Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
7f97b8ef
Unverified
Commit
7f97b8ef
authored
Oct 07, 2022
by
Ted Themistokleous
Committed by
GitHub
Oct 07, 2022
Browse files
Merge branch 'simplify_1_mul_div_ops' into divide_by_zero_check
parents
2ba401f0
d1fed367
Changes
448
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
369 additions
and
877 deletions
+369
-877
src/targets/gpu/driver/include/migraphx/gpu/driver/perf.hpp
src/targets/gpu/driver/include/migraphx/gpu/driver/perf.hpp
+2
-1
src/targets/gpu/driver/perf.cpp
src/targets/gpu/driver/perf.cpp
+19
-10
src/targets/gpu/driver/run_op.cpp
src/targets/gpu/driver/run_op.cpp
+2
-2
src/targets/gpu/fuse_mlir.cpp
src/targets/gpu/fuse_mlir.cpp
+13
-1
src/targets/gpu/fuse_ops.cpp
src/targets/gpu/fuse_ops.cpp
+151
-420
src/targets/gpu/gemm_impl.cpp
src/targets/gpu/gemm_impl.cpp
+31
-9
src/targets/gpu/hip.cpp
src/targets/gpu/hip.cpp
+54
-17
src/targets/gpu/include/migraphx/gpu/asinh.hpp
src/targets/gpu/include/migraphx/gpu/asinh.hpp
+0
-42
src/targets/gpu/include/migraphx/gpu/atan.hpp
src/targets/gpu/include/migraphx/gpu/atan.hpp
+0
-42
src/targets/gpu/include/migraphx/gpu/atanh.hpp
src/targets/gpu/include/migraphx/gpu/atanh.hpp
+0
-42
src/targets/gpu/include/migraphx/gpu/ceil.hpp
src/targets/gpu/include/migraphx/gpu/ceil.hpp
+0
-42
src/targets/gpu/include/migraphx/gpu/compile_gen.hpp
src/targets/gpu/include/migraphx/gpu/compile_gen.hpp
+12
-0
src/targets/gpu/include/migraphx/gpu/concat_gpu_opt.hpp
src/targets/gpu/include/migraphx/gpu/concat_gpu_opt.hpp
+4
-2
src/targets/gpu/include/migraphx/gpu/context.hpp
src/targets/gpu/include/migraphx/gpu/context.hpp
+73
-1
src/targets/gpu/include/migraphx/gpu/convolution.hpp
src/targets/gpu/include/migraphx/gpu/convolution.hpp
+8
-1
src/targets/gpu/include/migraphx/gpu/cosh.hpp
src/targets/gpu/include/migraphx/gpu/cosh.hpp
+0
-42
src/targets/gpu/include/migraphx/gpu/device/acosh.hpp
src/targets/gpu/include/migraphx/gpu/device/acosh.hpp
+0
-43
src/targets/gpu/include/migraphx/gpu/device/add.hpp
src/targets/gpu/include/migraphx/gpu/device/add.hpp
+0
-50
src/targets/gpu/include/migraphx/gpu/device/add_clip.hpp
src/targets/gpu/include/migraphx/gpu/device/add_clip.hpp
+0
-57
src/targets/gpu/include/migraphx/gpu/device/add_relu.hpp
src/targets/gpu/include/migraphx/gpu/device/add_relu.hpp
+0
-53
No files found.
src/targets/gpu/driver/include/migraphx/gpu/driver/perf.hpp
View file @
7f97b8ef
...
...
@@ -33,7 +33,8 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace
gpu
{
namespace
driver
{
double
time_op
(
context
&
ctx
,
operation
op
,
const
std
::
vector
<
shape
>&
inputs
,
int
n
=
100
);
std
::
pair
<
double
,
double
>
time_op
(
context
&
ictx
,
operation
op
,
const
std
::
vector
<
shape
>&
inputs
,
int
n
=
100
);
}
// namespace driver
}
// namespace gpu
...
...
src/targets/gpu/driver/perf.cpp
View file @
7f97b8ef
...
...
@@ -42,22 +42,31 @@ std::vector<argument> generate_arguments(const std::vector<shape>& shapes, unsig
}
using
milliseconds
=
std
::
chrono
::
duration
<
double
,
std
::
milli
>
;
double
time_op
(
context
&
ctx
,
operation
op
,
const
std
::
vector
<
shape
>&
inputs
,
int
n
)
std
::
pair
<
double
,
double
>
time_op
(
context
&
ictx
,
operation
op
,
const
std
::
vector
<
shape
>&
inputs
,
int
n
)
{
// TODO: Use std::ref
migraphx
::
context
gctx
=
ctx
;
auto
output
=
op
.
compute_shape
(
inputs
);
op
.
finalize
(
gctx
,
output
,
inputs
);
migraphx
::
context
ctx
=
ictx
;
auto
&
gctx
=
any_cast
<
migraphx
::
gpu
::
context
>
(
ctx
);
auto
output
=
op
.
compute_shape
(
inputs
);
op
.
finalize
(
ctx
,
output
,
inputs
);
auto
args
=
generate_arguments
(
inputs
);
auto
run
=
[
&
]
{
op
.
compute
(
g
ctx
,
output
,
args
);
g
ctx
.
finish
();
op
.
compute
(
ctx
,
output
,
args
);
ctx
.
finish
();
};
gctx
.
enable_perf_measurement
();
run
();
auto
r
=
range
(
n
);
double
t
=
std
::
accumulate
(
r
.
begin
(),
r
.
end
(),
double
{
0.0
},
[
&
](
auto
x
,
auto
)
{
return
x
+
time
<
milliseconds
>
(
run
);
});
return
t
/
n
;
double
host_time
=
0.0
;
double
device_time
=
0.0
;
for
(
auto
i
:
range
(
n
))
{
(
void
)
i
;
host_time
+=
time
<
milliseconds
>
(
run
);
device_time
+=
gctx
.
get_elapsed_ms
();
}
return
std
::
make_pair
(
host_time
/
n
,
device_time
/
n
);
}
}
// namespace driver
...
...
src/targets/gpu/driver/run_op.cpp
View file @
7f97b8ef
...
...
@@ -43,8 +43,8 @@ struct run_op : action<run_op>
auto
op
=
make_op
(
name
);
if
(
v
.
contains
(
"fields"
))
op
.
from_value
(
v
.
at
(
"fields"
));
double
t
=
time_op
(
ctx
,
op
,
inputs
,
p
.
get
(
v
,
"iterations"
,
100
));
std
::
cout
<<
op
<<
": "
<<
t
<<
"ms"
<<
std
::
endl
;
auto
[
host_time
,
device_time
]
=
time_op
(
ctx
,
op
,
inputs
,
p
.
get
(
v
,
"iterations"
,
100
));
std
::
cout
<<
op
<<
": "
<<
host_time
<<
"ms"
<<
std
::
endl
;
}
};
...
...
src/targets/gpu/fuse_mlir.cpp
View file @
7f97b8ef
...
...
@@ -61,13 +61,25 @@ struct mlir_conv
MIGRAPHX_REGISTER_OP
(
mlir_conv
);
namespace
{
MIGRAPHX_PRED_MATCHER
(
is_mlir_conv
,
instruction_ref
ins
)
{
if
(
ins
->
name
()
!=
"convolution"
)
return
false
;
value
v
=
ins
->
get_operator
().
to_value
();
auto
group
=
v
.
at
(
"group"
).
to
<
int
>
();
if
(
group
!=
1
)
return
false
;
return
true
;
}
struct
find_conv_pointwise
{
// Find a convolution followed by a pointwise operation.
auto
matcher
()
const
{
auto
convolution
=
match
::
skip
(
match
::
name
(
"contiguous"
))(
match
::
name
(
"convolution"
).
bind
(
"convolution"
));
match
::
skip
(
match
::
name
(
"contiguous"
))(
is_mlir_conv
(
).
bind
(
"convolution"
));
return
match
::
name
(
"pointwise"
)(
match
::
any_of
[
match
::
inputs
()](
convolution
.
bind
(
"x"
)));
}
...
...
src/targets/gpu/fuse_ops.cpp
View file @
7f97b8ef
...
...
@@ -26,30 +26,15 @@
#include <migraphx/gpu/fuse_ops.hpp>
#include <migraphx/matcher.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <migraphx/gpu/clip.hpp>
#include <migraphx/gpu/convolution.hpp>
#include <migraphx/gpu/device_name.hpp>
#include <migraphx/gpu/oper.hpp>
#include <migraphx/gpu/add.hpp>
#include <migraphx/gpu/mul.hpp>
#include <migraphx/gpu/gemm.hpp>
#include <migraphx/gpu/device/layernorm.hpp>
#include <migraphx/gpu/device/gelu.hpp>
#include <migraphx/gpu/device/mul_add.hpp>
#include <migraphx/gpu/device/add_clip.hpp>
#include <migraphx/gpu/device/add_relu.hpp>
#include <migraphx/gpu/device/add_sigmoid.hpp>
#include <migraphx/gpu/device/add_tanh.hpp>
#include <migraphx/gpu/device/mul_add_relu.hpp>
#include <migraphx/gpu/device/add.hpp>
#include <migraphx/match/layernorm.hpp>
#include <migraphx/match/gelu_erf.hpp>
#include <migraphx/match/gelu_tanh.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/register_op.hpp>
#include <migraphx/array.hpp>
#include <migraphx/permutation.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/op/clip.hpp>
#include <cmath>
#include <set>
...
...
@@ -225,95 +210,6 @@ MIGRAPHX_PRED_MATCHER(fusable_conv, instruction_ref ins)
contains
({{
0
,
0
},
{
1
,
1
}},
op
.
stride
)
and
contains
({{
1
,
1
}},
op
.
dilation
);
}
struct
hip_triadd
:
ternary_device
<
hip_triadd
,
&
device
::
add
>
{
};
MIGRAPHX_REGISTER_OP
(
hip_triadd
)
struct
hip_triadd_clip
:
quinary_device
<
hip_triadd_clip
,
&
device
::
add_clip
>
{
};
MIGRAPHX_REGISTER_OP
(
hip_triadd_clip
)
struct
hip_add_clip
:
quaternary_device
<
hip_add_clip
,
&
device
::
add_clip
>
{
};
MIGRAPHX_REGISTER_OP
(
hip_add_clip
)
struct
hip_triadd_relu
:
ternary_device
<
hip_triadd_relu
,
&
device
::
add_relu
>
{
};
MIGRAPHX_REGISTER_OP
(
hip_triadd_relu
)
struct
hip_triadd_sigmoid
:
ternary_device
<
hip_triadd_sigmoid
,
&
device
::
add_sigmoid
>
{
};
MIGRAPHX_REGISTER_OP
(
hip_triadd_sigmoid
)
struct
hip_triadd_tanh
:
ternary_device
<
hip_triadd_tanh
,
&
device
::
add_tanh
>
{
};
MIGRAPHX_REGISTER_OP
(
hip_triadd_tanh
)
struct
hip_add_relu
:
binary_device
<
hip_add_relu
,
&
device
::
add_relu
>
{
};
MIGRAPHX_REGISTER_OP
(
hip_add_relu
)
struct
hip_add_sigmoid
:
binary_device
<
hip_add_relu
,
&
device
::
add_sigmoid
>
{
};
MIGRAPHX_REGISTER_OP
(
hip_add_sigmoid
)
struct
hip_add_tanh
:
binary_device
<
hip_add_tanh
,
&
device
::
add_tanh
>
{
};
MIGRAPHX_REGISTER_OP
(
hip_add_tanh
)
struct
hip_layernorm
:
unary_device
<
hip_layernorm
,
&
device
::
layernorm
>
{
// Empty finalize to skip dimension reduction
void
finalize
(
context
&
,
const
shape
&
,
const
std
::
vector
<
shape
>&
)
{}
};
MIGRAPHX_REGISTER_OP
(
hip_layernorm
)
struct
hip_triadd_layernorm
:
ternary_device
<
hip_triadd_layernorm
,
&
device
::
triadd_layernorm
>
{
// Empty finalize to skip dimension reduction
void
finalize
(
context
&
,
const
shape
&
,
const
std
::
vector
<
shape
>&
)
{}
};
MIGRAPHX_REGISTER_OP
(
hip_triadd_layernorm
)
struct
hip_gelu
:
unary_device
<
hip_gelu
,
&
device
::
gelu
>
{
};
MIGRAPHX_REGISTER_OP
(
hip_gelu
)
struct
hip_add_gelu
:
binary_device
<
hip_add_gelu
,
&
device
::
add_gelu
>
{
};
MIGRAPHX_REGISTER_OP
(
hip_add_gelu
)
struct
hip_gelu_new
:
unary_device
<
hip_gelu_new
,
&
device
::
gelu_new
>
{
};
MIGRAPHX_REGISTER_OP
(
hip_gelu_new
)
struct
hip_add_gelu_new
:
binary_device
<
hip_add_gelu_new
,
&
device
::
add_gelu_new
>
{
};
MIGRAPHX_REGISTER_OP
(
hip_add_gelu_new
)
struct
hip_mul_add
:
ternary_device
<
hip_mul_add
,
&
device
::
mul_add
>
{
};
MIGRAPHX_REGISTER_OP
(
hip_mul_add
)
struct
hip_mul_add_relu
:
ternary_device
<
hip_mul_add_relu
,
&
device
::
mul_add_relu
>
{
};
MIGRAPHX_REGISTER_OP
(
hip_mul_add_relu
)
void
move_broadcasted_back
(
std
::
vector
<
instruction_ref
>&
args
)
{
// Ensure the last arguments is the broadcasted one
...
...
@@ -337,256 +233,6 @@ void move_standard_front(std::vector<instruction_ref>& args)
auto
gpu_name
(
const
std
::
string
&
s
)
{
return
match
::
name
(
"gpu::"
+
s
);
}
namespace
{
struct
find_layernorm
{
auto
matcher
()
const
{
return
match
::
layernorm
(
&
gpu_name
);
}
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
auto
ins
=
r
.
result
;
auto
x_ins
=
r
.
instructions
[
"x"
];
auto
args
=
ins
->
inputs
();
// We dont fuse for non-standard layouts
if
(
not
x_ins
->
get_shape
().
standard
())
return
;
auto
relements
=
x_ins
->
get_shape
().
lens
().
back
();
if
(
relements
>
1024
or
(
relements
%
4
!=
0
and
relements
>
256
))
return
;
m
.
replace_instruction
(
ins
,
hip_layernorm
{},
x_ins
,
args
.
back
());
}
};
struct
find_triadd_layernorm
{
auto
matcher
()
const
{
return
match
::
name
(
"gpu::layernorm"
)(
match
::
arg
(
0
)(
match
::
name
(
"gpu::triadd"
)(
match
::
used_once
(),
match
::
all_of
[
match
::
inputs
()](
match
::
standard_shape
()))));
}
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
auto
ins
=
r
.
result
;
auto
triadd
=
ins
->
inputs
().
front
();
m
.
replace_instruction
(
ins
,
hip_triadd_layernorm
{},
triadd
->
inputs
());
}
};
struct
find_gelu
{
auto
matcher
()
const
{
return
match
::
gelu_erf
(
&
gpu_name
);
}
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
auto
ins
=
r
.
result
;
auto
x_ins
=
r
.
instructions
[
"x"
];
auto
args
=
ins
->
inputs
();
m
.
replace_instruction
(
ins
,
hip_gelu
{},
x_ins
,
args
.
back
());
}
};
struct
find_add_gelu
{
auto
matcher
()
const
{
return
match
::
name
(
"gpu::gelu"
)(
match
::
arg
(
0
)(
match
::
name
(
"gpu::add"
).
bind
(
"add"
)));
}
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
auto
add_ins
=
r
.
instructions
[
"add"
];
auto
ins
=
r
.
result
;
auto
args
=
add_ins
->
inputs
();
move_standard_front
(
args
);
move_broadcasted_back
(
args
);
args
.
back
()
=
ins
->
inputs
().
back
();
m
.
replace_instruction
(
ins
,
hip_add_gelu
{},
args
);
}
};
struct
find_gelu_new
{
bool
fast_math
=
true
;
auto
matcher
()
const
{
return
match
::
gelu_tanh
(
&
gpu_name
);
}
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
auto
ins
=
r
.
result
;
auto
x_ins
=
r
.
instructions
[
"x"
];
auto
args
=
ins
->
inputs
();
if
(
fast_math
)
m
.
replace_instruction
(
ins
,
hip_gelu
{},
x_ins
,
args
.
back
());
else
m
.
replace_instruction
(
ins
,
hip_gelu_new
{},
x_ins
,
args
.
back
());
}
};
struct
find_add_gelu_new
{
auto
matcher
()
const
{
return
match
::
name
(
"gpu::gelu_new"
)(
match
::
arg
(
0
)(
match
::
name
(
"gpu::add"
).
bind
(
"add"
)));
}
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
auto
add_ins
=
r
.
instructions
[
"add"
];
auto
ins
=
r
.
result
;
auto
args
=
add_ins
->
inputs
();
move_standard_front
(
args
);
move_broadcasted_back
(
args
);
args
.
back
()
=
ins
->
inputs
().
back
();
m
.
replace_instruction
(
ins
,
hip_add_gelu_new
{},
args
);
}
};
struct
find_add_clip
{
auto
matcher
()
const
{
return
match
::
name
(
std
::
unordered_set
<
std
::
string
>
{
"gpu::clip"
,
"gpu::clipped_relu"
})(
match
::
arg
(
0
)(
match
::
any_of
(
match
::
name
(
"gpu::add"
),
match
::
name
(
"gpu::triadd"
),
match
::
any_of
[
match
::
inputs
()](
match
::
standard_shape
()))
.
bind
(
"add"
)));
}
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
auto
add_ins
=
r
.
instructions
[
"add"
];
auto
ins
=
r
.
result
;
auto
ins_args
=
ins
->
inputs
();
auto
add_args
=
add_ins
->
inputs
();
move_standard_front
(
add_args
);
move_broadcasted_back
(
add_args
);
// Use the allocation from the clip operator
add_args
.
pop_back
();
add_args
.
insert
(
add_args
.
end
(),
std
::
next
(
ins_args
.
begin
()),
ins_args
.
end
());
if
(
add_ins
->
name
()
==
"gpu::add"
)
m
.
replace_instruction
(
ins
,
hip_add_clip
{},
add_args
);
else
if
(
add_ins
->
name
()
==
"gpu::triadd"
)
m
.
replace_instruction
(
ins
,
hip_triadd_clip
{},
add_args
);
}
};
struct
find_add_unary
{
std
::
string
op_name
;
operation
binary_add_op
;
operation
ternary_add_op
;
auto
matcher
()
const
{
return
match
::
name
(
op_name
)(
match
::
arg
(
0
)(
match
::
used_once
(),
match
::
any_of
(
match
::
name
(
"gpu::add"
),
match
::
name
(
"gpu::triadd"
),
match
::
any_of
(
match
::
name
(
"@literal"
),
match
::
any_of
[
match
::
inputs
()](
match
::
standard_shape
())))
.
bind
(
"add"
)));
}
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
auto
add_ins
=
r
.
instructions
[
"add"
];
auto
ins
=
r
.
result
;
auto
args
=
add_ins
->
inputs
();
move_standard_front
(
args
);
move_broadcasted_back
(
args
);
// Use the allocation from the relu operator
args
.
back
()
=
ins
->
inputs
().
back
();
if
(
add_ins
->
name
()
==
"gpu::add"
)
m
.
replace_instruction
(
ins
,
binary_add_op
,
args
);
else
if
(
add_ins
->
name
()
==
"gpu::triadd"
)
m
.
replace_instruction
(
ins
,
ternary_add_op
,
args
);
}
};
struct
find_triadd
{
auto
matcher
()
const
{
return
match
::
name
(
"gpu::add"
)(
match
::
either_arg
(
0
,
1
)(
match
::
name
(
"gpu::add"
)(
match
::
used_once
()).
bind
(
"add"
),
match
::
any
(
match
::
any_of
(
match
::
name
(
"@literal"
),
match
::
any_of
[
match
::
inputs
()](
match
::
standard_shape
())))
.
bind
(
"input"
)));
}
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
auto
add_ins
=
r
.
instructions
[
"add"
];
auto
input_ins
=
r
.
instructions
[
"input"
];
auto
ins
=
r
.
result
;
auto
args
=
add_ins
->
inputs
();
auto
is_broadcasted
=
[](
auto
arg
)
{
return
arg
->
get_shape
().
broadcasted
();
};
if
(
std
::
count_if
(
args
.
begin
(),
args
.
end
(),
is_broadcasted
)
>
2
)
return
;
args
.
insert
(
args
.
begin
(),
input_ins
);
move_standard_front
(
args
);
move_broadcasted_back
(
args
);
args
.
back
()
=
ins
->
inputs
().
back
();
m
.
replace_instruction
(
ins
,
hip_triadd
{},
args
);
}
};
struct
find_mul_add
{
auto
matcher
()
const
{
return
match
::
name
(
"gpu::add"
)(
match
::
either_arg
(
0
,
1
)(
match
::
name
(
"gpu::mul"
)(
match
::
used_once
()).
bind
(
"mul"
),
match
::
any
().
bind
(
"b"
)));
}
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
auto
mul_ins
=
r
.
instructions
[
"mul"
];
auto
b_ins
=
r
.
instructions
[
"b"
];
auto
ins
=
r
.
result
;
auto
args
=
mul_ins
->
inputs
();
assert
(
mul_ins
!=
b_ins
);
move_standard_front
(
args
);
move_broadcasted_back
(
args
);
args
.
insert
(
std
::
prev
(
args
.
end
()),
b_ins
);
args
.
back
()
=
ins
->
inputs
().
back
();
m
.
replace_instruction
(
ins
,
hip_mul_add
{},
args
);
}
};
struct
find_mul_add_relu
{
auto
matcher
()
const
{
return
match
::
name
(
"gpu::relu"
)(
match
::
arg
(
0
)(
match
::
name
(
"gpu::mul_add"
)(
match
::
used_once
()).
bind
(
"mul_add"
)));
}
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
auto
mul_add_ins
=
r
.
instructions
[
"mul_add"
];
auto
ins
=
r
.
result
;
auto
args
=
mul_add_ins
->
inputs
();
// Use the allocation from the relu operator
args
.
back
()
=
ins
->
inputs
().
back
();
m
.
replace_instruction
(
ins
,
hip_mul_add_relu
{},
args
);
}
};
struct
miopen_fusion
{
struct
fuse_op_data
...
...
@@ -827,13 +473,14 @@ void apply_conv_bias(context& ctx, module& m, const match::matcher_result& r)
m
.
replace_instruction
(
ins
,
cb
,
input_ins
,
weights_ins
,
old_ws_ins
,
bias_ins
,
alloc_ins
);
}
inline
auto
precompile_name
(
std
::
string
s
)
// NOLINT
template
<
class
...
Strings
>
inline
auto
precompile_name
(
Strings
...
names
)
// NOLINT
{
return
match
::
make_basic_pred_matcher
([
=
](
instruction_ref
ins
)
{
if
(
ins
->
name
()
!=
"gpu::precompile_op"
)
return
false
;
auto
op
=
from_value
<
operation
>
(
ins
->
get_operator
().
to_value
().
at
(
"op"
));
return
(
op
.
name
()
==
s
);
return
(
contains
({
names
...},
op
.
name
()
)
);
});
}
...
...
@@ -902,14 +549,70 @@ struct find_conv_pointwise
}
};
struct
find_gemm_
add
struct
find_gemm_
pointwise
{
auto
matcher
()
const
{
return
match
::
name
(
"gpu::add"
)(
match
::
all_of
[
match
::
inputs
()](
match
::
standard_shape
()),
match
::
either_arg
(
0
,
1
)(
match
::
used_once
().
bind
(
"c"
),
match
::
name
(
"gpu::gemm"
)(
match
::
nargs
(
3
)).
bind
(
"gemm"
)));
return
precompile_name
(
"pointwise"
)(
match
::
nargs
(
3
),
match
::
either_arg
(
0
,
1
)(
match
::
any_of
(
match
::
standard_shape
(),
match
::
is_constant
()).
bind
(
"c"
),
match
::
name
(
"gpu::gemm"
)(
match
::
nargs
(
3
),
match
::
used_once
()).
bind
(
"gemm"
)));
}
// TODO: Move to matcher.hpp
static
auto
match_param
(
const
std
::
string
&
name
)
{
return
match
::
make_basic_pred_matcher
([
=
](
auto
ins
)
{
if
(
ins
->
name
()
!=
"@param"
)
return
false
;
auto
p
=
any_cast
<
builtin
::
param
>
(
ins
->
get_operator
());
return
p
.
parameter
==
name
;
});
}
template
<
class
M
>
static
auto
match_mul_const
(
M
m
,
const
std
::
string
&
var
)
{
return
match
::
name
(
"mul"
)(
match
::
either_arg
(
0
,
1
)(
match
::
name
(
"@literal"
).
bind
(
var
),
m
))
.
bind
(
var
+
"_mul"
);
}
static
auto
match_add
(
const
std
::
string
&
input
,
const
std
::
string
&
output
)
{
auto
param
=
match
::
name
(
"@param"
);
auto
add
=
match
::
name
(
"add"
)(
match
::
args
(
param
,
param
));
auto
inner_mul
=
match
::
any_of
(
match_mul_const
(
match_param
(
input
),
"alpha"
),
match_mul_const
(
match_param
(
output
),
"beta"
));
auto
mul_add
=
match
::
name
(
"add"
)(
match
::
either_arg
(
0
,
1
)(
inner_mul
,
param
));
auto
add_mul
=
match_mul_const
(
add
,
"gamma"
);
return
match
::
name
(
"@return"
)(
match
::
args
(
match
::
any_of
(
add
,
mul_add
,
add_mul
)));
}
static
float
get_float
(
instruction_ref
ins
)
{
return
ins
->
get_literal
().
at
<
float
>
();
}
template
<
class
Gemm
>
static
bool
update_gemm
(
Gemm
&
gemm
,
module_ref
pm
,
unsigned
input
)
{
auto
names
=
pm
->
get_parameter_names
();
if
(
names
.
size
()
!=
2
)
return
false
;
std
::
sort
(
names
.
begin
(),
names
.
end
());
unsigned
output
=
input
==
0
?
1
:
0
;
auto
mr
=
match
::
match_instruction
(
*
pm
,
std
::
prev
(
pm
->
end
()),
match_add
(
names
[
input
],
names
[
output
]));
if
(
mr
.
result
==
pm
->
end
())
return
false
;
if
(
contains
(
mr
.
instructions
,
"alpha_mul"
))
gemm
.
alpha
*=
get_float
(
mr
.
instructions
[
"alpha"
]);
else
if
(
contains
(
mr
.
instructions
,
"beta_mul"
))
gemm
.
beta
*=
get_float
(
mr
.
instructions
[
"beta"
]);
else
if
(
contains
(
mr
.
instructions
,
"gamma_mul"
))
{
gemm
.
alpha
*=
get_float
(
mr
.
instructions
[
"gamma"
]);
gemm
.
beta
*=
get_float
(
mr
.
instructions
[
"gamma"
]);
}
return
true
;
}
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
...
...
@@ -923,69 +626,85 @@ struct find_gemm_add
// Already fused gemm
if
(
not
float_equal
(
gemm
.
beta
,
0
))
return
;
gemm
.
beta
=
1
;
auto
inputs
=
gemm_ins
->
inputs
();
inputs
.
pop_back
();
auto
copy_ins
=
c_ins
;
if
(
not
update_gemm
(
gemm
,
ins
->
module_inputs
().
front
(),
ins
->
inputs
().
front
()
==
gemm_ins
?
0
:
1
))
return
;
//
Insert copy
if
(
ins
==
m
.
end
()
or
c_ins
->
outputs
().
size
()
>
1
or
c_ins
->
inputs
().
empty
())
//
const-fold input if not standard shape since rocblas can't handle it
if
(
not
c_ins
->
get_shape
().
standard
())
{
copy_ins
=
m
.
insert_instruction
(
ins
,
hip_copy
{},
c_ins
,
ins
->
inputs
().
back
());
auto
c
=
make_op
(
"contiguous"
);
auto
l
=
c
.
compute
(
c
.
compute_shape
({
c_ins
->
get_shape
()}),
{
c_ins
->
eval
()});
c_ins
=
m
.
add_literal
(
l
.
get_shape
(),
l
.
data
());
}
inputs
.
push_back
(
copy_ins
);
inputs
.
push_back
(
copy_ins
);
gemm
.
beta
=
1
;
auto
inputs
=
gemm_ins
->
inputs
();
inputs
.
pop_back
();
inputs
.
push_back
(
c_ins
);
inputs
.
push_back
(
ins
->
inputs
().
back
());
m
.
replace_instruction
(
ins
,
gemm
,
inputs
);
}
};
auto
pointwise_name
(
const
std
::
string
&
s
)
{
return
precompile_name
(
"pointwise"
)(
match
::
make_basic_pred_matcher
([
=
](
auto
ins
)
{
module_ref
pm
=
ins
->
module_inputs
().
front
();
auto
n
=
std
::
count_if
(
pm
->
begin
(),
pm
->
end
(),
[
&
](
auto
&
i
)
{
return
i
.
name
()
==
s
;
});
if
(
n
!=
1
)
return
false
;
return
std
::
all_of
(
pm
->
begin
(),
pm
->
end
(),
[
&
](
auto
&
i
)
{
return
starts_with
(
i
.
name
(),
"@"
)
or
i
.
name
()
==
s
;
});
}));
}
struct
find_gemm_pointwise
struct
find_contiguous_tranpose_gemm
{
auto
matcher
()
const
{
return
pointwise_name
(
"add"
)(
match
::
nargs
(
3
),
match
::
all_of
[
match
::
inputs
()](
match
::
standard_shape
()),
match
::
either_arg
(
0
,
1
)(
match
::
used_once
().
bind
(
"c"
),
match
::
name
(
"gpu::gemm"
)(
match
::
nargs
(
3
)).
bind
(
"gemm"
)));
return
match
::
name
(
"gpu::contiguous"
)(
match
::
arg
(
0
)(
match
::
name
(
"transpose"
)(
match
::
arg
(
0
)(
match
::
name
(
"gpu::gemm"
)(
match
::
used_once
()).
bind
(
"gemm"
)))
.
bind
(
"transpose"
)));
}
template
<
class
Vector
>
static
bool
is_swapped
(
const
Vector
&
perm
,
std
::
size_t
i
,
std
::
size_t
j
)
{
if
(
i
>=
perm
.
size
()
or
j
>=
perm
.
size
())
return
false
;
auto
perm2
=
perm
;
std
::
iota
(
perm2
.
begin
(),
perm2
.
end
(),
0
);
std
::
swap
(
perm2
[
i
],
perm2
[
j
]);
return
perm2
==
perm
;
}
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
auto
ins
=
r
.
result
;
auto
gemm_ins
=
r
.
instructions
[
"gemm"
];
auto
c_ins
=
r
.
instructions
[
"c"
];
auto
ins
=
r
.
result
;
auto
gemm
=
r
.
instructions
[
"gemm"
];
auto
alloc
=
gemm
->
inputs
().
back
();
auto
transpose
=
r
.
instructions
[
"transpose"
];
auto
perm
=
transpose
->
get_operator
().
to_value
()[
"permutation"
].
to_vector
<
int64_t
>
();
auto
iperm
=
invert_permutation
(
perm
);
auto
gemm
=
any_cast
<
rocblas_gemm
<
op
::
dot
>>
(
gemm_ins
->
get_operator
());
if
(
perm
.
size
()
<
3
)
return
;
// Already fused gemm
if
(
not
float_equal
(
gemm
.
beta
,
0
))
if
(
not
is_swapped
(
perm
,
perm
.
size
()
-
3
,
perm
.
size
()
-
2
))
return
;
auto
inputs
=
gemm_ins
->
inputs
();
inputs
.
pop_back
();
auto
lens
=
gemm
->
get_shape
().
lens
();
if
(
lens
.
size
()
>
3
and
not
std
::
all_of
(
lens
.
begin
(),
lens
.
end
()
-
3
,
[](
auto
i
)
{
return
i
==
1
;
}))
return
;
inputs
.
push_back
(
c_ins
);
inputs
.
push_back
(
ins
->
inputs
().
back
())
;
auto
gemmv
=
gemm
->
get_operator
().
to_value
(
);
gemmv
[
"trans_batch"
]
=
1
;
gemm
.
beta
=
1
;
m
.
replace_instruction
(
ins
,
gemm
,
inputs
);
auto
s
=
shape
{
alloc
->
get_shape
().
type
(),
reorder_dims
(
alloc
->
get_shape
().
lens
(),
iperm
)};
auto
new_alloc
=
m
.
insert_instruction
(
gemm
,
make_op
(
"allocate"
,
{{
"shape"
,
to_value
(
s
)}}));
auto
alloc_transpose
=
m
.
insert_instruction
(
gemm
,
make_op
(
"transpose"
,
{{
"permutation"
,
perm
}}),
new_alloc
);
auto
inputs
=
gemm
->
inputs
();
inputs
.
back
()
=
alloc_transpose
;
auto
new_gemm
=
m
.
insert_instruction
(
gemm
,
make_op
(
"gpu::gemm"
,
gemmv
),
inputs
);
auto
gemm_transpoe
=
m
.
insert_instruction
(
gemm
,
transpose
->
get_operator
(),
new_gemm
);
m
.
replace_instruction
(
ins
,
gemm_transpoe
);
}
};
...
...
@@ -1041,29 +760,41 @@ struct find_contiguous_pointwise
}
};
struct
find_layernorm_pointwise
{
auto
matcher
()
const
{
return
precompile_name
(
"pointwise"
)(
match
::
arg
(
0
)(
precompile_name
(
"gpu::prelayernorm"
,
"gpu::preadd_layernorm"
).
bind
(
"layernorm"
)));
}
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
auto
ins
=
r
.
result
;
auto
layernorm
=
r
.
instructions
[
"layernorm"
];
auto
*
pm
=
ins
->
module_inputs
().
front
();
if
(
not
layernorm
->
module_inputs
().
empty
())
return
;
auto
inputs
=
layernorm
->
inputs
();
inputs
.
pop_back
();
inputs
.
insert
(
inputs
.
end
(),
ins
->
inputs
().
begin
()
+
1
,
ins
->
inputs
().
end
());
m
.
replace_instruction
(
ins
,
layernorm
->
get_operator
(),
inputs
,
{
pm
});
}
};
void
fuse_ops
::
apply
(
module
&
m
)
const
{
match
::
find_matches
(
m
,
find_contiguous_pointwise
{}
,
find_gelu
{},
find_gelu_new
{
fast_math
}
);
match
::
find_matches
(
m
,
find_contiguous_pointwise
{});
run_passes
(
m
,
{
dead_code_elimination
{}});
match
::
find_matches
(
m
,
find_triadd
{});
match
::
find_matches
(
m
,
find_layernorm
{},
find_conv_pointwise
{
ctx
},
find_conv_bias_relu
{
ctx
},
find_conv_bias
{
ctx
},
find_add_gelu
{},
find_add_gelu_new
{},
find_mul_add
{},
find_mul_add_relu
{},
find_add_unary
{
"gpu::relu"
,
hip_add_relu
{},
hip_triadd_relu
{}},
find_add_unary
{
"gpu::sigmoid"
,
hip_add_sigmoid
{},
hip_triadd_sigmoid
{}},
find_add_unary
{
"gpu::tanh"
,
hip_add_tanh
{},
hip_triadd_tanh
{}},
find_add_clip
{});
match
::
find_matches
(
m
,
find_conv_pointwise
{
ctx
},
find_conv_bias_relu
{
ctx
},
find_conv_bias
{
ctx
});
run_passes
(
m
,
{
dead_code_elimination
{}});
match
::
find_matches
(
m
,
find_triadd_layernorm
{},
find_gemm_add
{},
find_layernorm_pointwise
{},
find_gemm_pointwise
{},
find_contiguous_tranpose_gemm
{},
find_commutative_broadcast
{});
match
::
find_matches
(
m
,
find_contiguous
{});
}
...
...
src/targets/gpu/gemm_impl.cpp
View file @
7f97b8ef
...
...
@@ -24,6 +24,7 @@
#include <rocblas.h>
#include <migraphx/gpu/gemm_impl.hpp>
#include <migraphx/reduce_dims.hpp>
#include <migraphx/permutation.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
...
@@ -67,6 +68,19 @@ void blas_shape(const shape& s)
MIGRAPHX_THROW
(
"GPU_GEMM: Batch dimension is not collapsible"
);
}
shape
transpose_batch
(
const
shape
&
s
,
unsigned
trans_batch
)
{
if
(
trans_batch
==
0
)
return
s
;
if
(
s
.
lens
().
size
()
<
3
)
return
s
;
auto
batch
=
s
.
lens
().
size
()
-
3
;
std
::
vector
<
int64_t
>
perm
(
s
.
lens
().
size
());
std
::
iota
(
perm
.
begin
(),
perm
.
end
(),
0
);
std
::
swap
(
perm
[
batch
],
perm
[
batch
+
trans_batch
]);
return
shape
::
from_permutation
(
s
.
type
(),
s
.
lens
(),
perm
);
}
template
<
class
R
,
class
...
Ts
,
class
...
Us
>
R
rocblas_invoke
(
R
(
*
f
)(
Ts
...),
Us
...
xs
)
{
...
...
@@ -97,6 +111,12 @@ void gemm_impl(context& ctx,
bool
int8_x4_format
,
bool
compute_fp32
)
{
const
bool
is_3inputs
=
(
args
.
size
()
==
4
);
if
(
not
is_3inputs
)
{
beta
=
0
;
}
bool
transa
=
is_transposed
(
args
[
0
].
get_shape
());
bool
transb
=
is_transposed
(
args
[
1
].
get_shape
());
auto
n_dim
=
output_shape
.
lens
().
size
();
...
...
@@ -105,12 +125,8 @@ void gemm_impl(context& ctx,
rocblas_int
lda
=
args
[
0
].
get_shape
().
strides
()[
transa
?
dim_1
:
dim_0
];
rocblas_int
ldb
=
args
[
1
].
get_shape
().
strides
()[
transb
?
dim_1
:
dim_0
];
rocblas_int
ldc
=
args
[
2
].
get_shape
().
strides
()[
dim_0
];
rocblas_int
ldd
=
is_3inputs
?
args
[
3
].
get_shape
().
strides
()[
dim_0
]
:
ldc
;
bool
is_3inputs
=
(
args
.
size
()
==
4
);
if
(
!
is_3inputs
)
{
beta
=
0
;
}
rocblas_datatype
arg_type
=
get_type
(
args
[
0
].
get_shape
().
type
());
auto
output_type
=
arg_type
;
if
(
output_type
==
rocblas_datatype_i8_r
)
...
...
@@ -160,8 +176,13 @@ void gemm_impl(context& ctx,
auto
num_matrices
=
std
::
accumulate
(
out_lens
.
rbegin
()
+
2
,
out_lens
.
rend
(),
std
::
size_t
{
1
},
std
::
multiplies
<
std
::
size_t
>
());
if
(
num_matrices
==
1
)
if
(
num_matrices
==
1
or
(
num_matrices
>
1
and
get_batch_stride
(
args
[
1
])
==
0
)
)
{
// If the batch dimension of B is broadcasted, then we can
// multiply m by the batch_size and use rocblas_gemm_ex
// instead of rocblas_gemm_strided_batched_ex.
m
*=
num_matrices
;
// the rocblas_gemm API handles inputs and output matrices as
// column-major format. When doing a C = A * B, we actually do
// C^T = (B^T) * (A^T). That is the reason we input args[1] as
...
...
@@ -186,7 +207,7 @@ void gemm_impl(context& ctx,
ldc
,
is_3inputs
?
to_pointer
(
args
[
3
])
:
to_pointer
(
args
[
2
]),
output_type
,
ld
c
,
ld
d
,
compute_type
,
rocblas_gemm_algo_standard
,
0
,
...
...
@@ -197,6 +218,7 @@ void gemm_impl(context& ctx,
auto
a_stride
=
get_batch_stride
(
args
[
0
]);
auto
b_stride
=
get_batch_stride
(
args
[
1
]);
auto
c_stride
=
get_batch_stride
(
args
[
2
]);
auto
d_stride
=
is_3inputs
?
get_batch_stride
(
args
[
3
])
:
c_stride
;
rocblas_invoke
(
&
rocblas_gemm_strided_batched_ex
,
ctx
.
get_stream
().
get_rocblas
(),
transb
?
rocblas_operation_transpose
:
rocblas_operation_none
,
...
...
@@ -220,8 +242,8 @@ void gemm_impl(context& ctx,
c_stride
,
is_3inputs
?
to_pointer
(
args
[
3
])
:
to_pointer
(
args
[
2
]),
output_type
,
ld
c
,
c
_stride
,
ld
d
,
d
_stride
,
num_matrices
,
compute_type
,
rocblas_gemm_algo_standard
,
...
...
src/targets/gpu/hip.cpp
View file @
7f97b8ef
...
...
@@ -23,13 +23,13 @@
*/
#include <migraphx/gpu/hip.hpp>
#include <migraphx/manage_ptr.hpp>
#include <migraphx/register_op.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/device/contiguous.hpp>
#include <miopen/miopen.h>
#include <memory>
#include <mutex>
#include <vector>
namespace
migraphx
{
...
...
@@ -77,12 +77,38 @@ void* get_device_ptr(void* hptr)
return
result
;
}
hip_ptr
allocate_gpu
(
std
::
size_t
sz
,
bool
host
=
false
)
struct
host_ptr_cache
{
std
::
unordered_map
<
void
*
,
std
::
weak_ptr
<
void
>>
cache
;
std
::
mutex
m
;
std
::
shared_ptr
<
void
>
get
(
void
*
ptr
)
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
m
);
auto
it
=
cache
.
find
(
ptr
);
if
(
it
!=
cache
.
end
())
return
it
->
second
.
lock
();
return
nullptr
;
}
void
put
(
const
std
::
shared_ptr
<
void
>&
p
)
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
m
);
cache
[
p
.
get
()]
=
p
;
}
};
static
host_ptr_cache
&
get_host_ptr_cache
()
{
static
host_ptr_cache
cache
;
return
cache
;
}
std
::
shared_ptr
<
void
>
allocate_gpu
(
std
::
size_t
sz
,
bool
host
=
false
)
{
if
(
sz
>
get_available_gpu_memory
())
MIGRAPHX_THROW
(
"Memory not available to allocate buffer: "
+
std
::
to_string
(
sz
));
void
*
result
=
nullptr
;
auto
status
=
host
?
hipHostMalloc
(
&
result
,
sz
)
:
hipMalloc
(
&
result
,
sz
);
void
*
alloc_ptr
=
nullptr
;
auto
status
=
host
?
hipHostMalloc
(
&
alloc_ptr
,
sz
)
:
hipMalloc
(
&
alloc_ptr
,
sz
);
if
(
status
!=
hipSuccess
)
{
if
(
host
)
...
...
@@ -90,16 +116,28 @@ hip_ptr allocate_gpu(std::size_t sz, bool host = false)
else
return
allocate_gpu
(
sz
,
true
);
}
assert
(
result
!=
nullptr
);
return
hip_ptr
{
result
};
assert
(
alloc_ptr
!=
nullptr
);
std
::
shared_ptr
<
void
>
result
=
share
(
hip_ptr
{
alloc_ptr
});
if
(
host
)
{
get_host_ptr_cache
().
put
(
result
);
}
return
result
;
}
hip_host_ptr
register_on_gpu
(
void
*
ptr
,
std
::
size_t
sz
)
std
::
shared_ptr
<
void
>
register_on_gpu
(
void
*
ptr
,
std
::
size_t
sz
)
{
std
::
shared_ptr
<
void
>
result
=
get_host_ptr_cache
().
get
(
ptr
);
if
(
result
)
{
return
result
;
}
auto
status
=
hipHostRegister
(
ptr
,
sz
,
hipHostRegisterMapped
);
if
(
status
!=
hipSuccess
)
MIGRAPHX_THROW
(
"Gpu register failed: "
+
hip_error
(
status
));
return
hip_host_ptr
{
ptr
};
result
=
share
(
hip_host_ptr
{
ptr
});
get_host_ptr_cache
().
put
(
result
);
return
result
;
}
template
<
class
T
>
...
...
@@ -115,7 +153,7 @@ std::vector<T> read_from_gpu(const void* x, std::size_t sz)
return
result
;
}
hip_ptr
write_to_gpu
(
const
void
*
x
,
std
::
size_t
sz
,
bool
host
=
false
)
std
::
shared_ptr
<
void
>
write_to_gpu
(
const
void
*
x
,
std
::
size_t
sz
,
bool
host
=
false
)
{
gpu_sync
();
auto
result
=
allocate_gpu
(
sz
,
host
);
...
...
@@ -137,22 +175,21 @@ hip_ptr write_to_gpu(const T& x)
argument
allocate_gpu
(
const
shape
&
s
,
bool
host
)
{
auto
p
=
share
(
allocate_gpu
(
s
.
bytes
()
+
1
,
host
)
)
;
auto
p
=
allocate_gpu
(
s
.
bytes
()
+
1
,
host
);
return
{
s
,
[
p
]()
mutable
{
return
reinterpret_cast
<
char
*>
(
p
.
get
());
}};
}
argument
register_on_gpu
(
const
argument
&
arg
)
{
auto
arg_shared
=
arg
.
share
();
auto
p
=
share
(
register_on_gpu
(
arg_shared
.
data
(),
arg_shared
.
get_shape
().
bytes
()));
return
{
arg_shared
.
get_shape
(),
[
p
,
a
=
std
::
move
(
arg_shared
)]()
mutable
{
return
get_device_ptr
(
p
.
get
());
}};
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
auto
p
=
register_on_gpu
(
arg_shared
.
data
(),
arg_shared
.
get_shape
().
bytes
());
return
{
arg_shared
.
get_shape
(),
[
p
,
a
=
std
::
move
(
arg_shared
)]()
mutable
{
return
get_device_ptr
(
p
.
get
());
}};
}
argument
to_gpu
(
const
argument
&
arg
,
bool
host
)
{
auto
p
=
share
(
write_to_gpu
(
arg
.
data
(),
arg
.
get_shape
().
bytes
(),
host
)
)
;
auto
p
=
write_to_gpu
(
arg
.
data
(),
arg
.
get_shape
().
bytes
(),
host
);
return
{
arg
.
get_shape
(),
p
};
}
...
...
src/targets/gpu/include/migraphx/gpu/asinh.hpp
deleted
100644 → 0
View file @
2ba401f0
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_RTGLIB_ASINH_HPP
#define MIGRAPHX_GUARD_RTGLIB_ASINH_HPP
#include <migraphx/gpu/oper.hpp>
#include <migraphx/gpu/device/asinh.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
struct
hip_asinh
:
unary_device
<
hip_asinh
,
device
::
asinh
>
{
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/include/migraphx/gpu/atan.hpp
deleted
100644 → 0
View file @
2ba401f0
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_RTGLIB_ATAN_HPP
#define MIGRAPHX_GUARD_RTGLIB_ATAN_HPP
#include <migraphx/gpu/oper.hpp>
#include <migraphx/gpu/device/atan.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
struct
hip_atan
:
unary_device
<
hip_atan
,
device
::
atan
>
{
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/include/migraphx/gpu/atanh.hpp
deleted
100644 → 0
View file @
2ba401f0
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_RTGLIB_ATANH_HPP
#define MIGRAPHX_GUARD_RTGLIB_ATANH_HPP
#include <migraphx/gpu/oper.hpp>
#include <migraphx/gpu/device/atanh.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
struct
hip_atanh
:
unary_device
<
hip_atanh
,
device
::
atanh
>
{
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/include/migraphx/gpu/ceil.hpp
deleted
100644 → 0
View file @
2ba401f0
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_RTGLIB_CEIL_HPP
#define MIGRAPHX_GUARD_RTGLIB_CEIL_HPP
#include <migraphx/gpu/oper.hpp>
#include <migraphx/gpu/device/ceil.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
struct
hip_ceil
:
unary_device
<
hip_ceil
,
device
::
ceil
>
{
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/include/migraphx/gpu/compile_gen.hpp
View file @
7f97b8ef
...
...
@@ -25,6 +25,7 @@
#define MIGRAPHX_GUARD_GPU_COMPILE_GEN_HPP
#include <migraphx/config.hpp>
#include <migraphx/module_ref.hpp>
#include <string>
#include <unordered_map>
#include <vector>
...
...
@@ -35,6 +36,9 @@ inline namespace MIGRAPHX_INLINE_NS {
struct
shape
;
namespace
gpu
{
struct
context
;
namespace
gen
{
struct
vectorize
...
...
@@ -42,6 +46,10 @@ struct vectorize
std
::
size_t
size
=
1
;
std
::
size_t
axis
=
0
;
static
vectorize
elements
(
std
::
size_t
axis
,
const
std
::
vector
<
shape
>&
inputs
);
static
vectorize
elements
(
context
&
ctx
,
std
::
size_t
axis
,
const
std
::
vector
<
shape
>&
inputs
);
static
vectorize
elements
(
std
::
size_t
axis
,
const
std
::
vector
<
shape
>&
inputs
,
const
std
::
vector
<
std
::
size_t
>&
sizes
);
std
::
string
str
()
const
;
};
struct
preload
...
...
@@ -62,6 +70,10 @@ std::string make_transformer_args(Ts... xs)
return
make_transformer_args
({
xs
.
str
()...});
}
std
::
string
generate_pointwise
(
const
module
&
pm
,
const
std
::
string
&
name
);
std
::
string
generate_name_from_ops
(
const
module
&
m
);
}
// namespace gen
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
...
...
src/targets/gpu/include/migraphx/gpu/concat_gpu_opt.hpp
View file @
7f97b8ef
...
...
@@ -24,8 +24,9 @@
#ifndef MIGRAPHX_GUARD_RTGLIB_CONCAT_GPU_OPT_HPP
#define MIGRAPHX_GUARD_RTGLIB_CONCAT_GPU_OPT_HPP
#include <migraphx/
gpu
/concat.hpp>
#include <migraphx/
op
/concat.hpp>
#include <migraphx/operation.hpp>
#include <migraphx/serialize.hpp>
namespace
migraphx
{
namespace
gpu
{
...
...
@@ -36,7 +37,8 @@ struct concat_gpu_optimization
std
::
string
allocate
()
const
{
return
"hip::allocate"
;
}
migraphx
::
op
::
concat
get_concat
(
const
migraphx
::
operation
&
op
)
const
{
return
migraphx
::
any_cast
<
migraphx
::
gpu
::
hip_concat
>
(
op
).
op
;
auto
v
=
op
.
to_value
();
return
from_value
<
migraphx
::
op
::
concat
>
(
v
.
at
(
"op"
));
}
};
...
...
src/targets/gpu/include/migraphx/gpu/context.hpp
View file @
7f97b8ef
...
...
@@ -197,7 +197,9 @@ struct hip_device
struct
context
{
context
(
std
::
size_t
device_id
=
0
,
std
::
size_t
n
=
value_of
(
MIGRAPHX_NSTREAMS
{},
1
))
:
current_device
(
std
::
make_shared
<
hip_device
>
(
device_id
,
n
))
:
current_device
(
std
::
make_shared
<
hip_device
>
(
device_id
,
n
)),
begin_event
(
create_event
()),
finish_event
(
create_event
())
{
}
...
...
@@ -244,6 +246,15 @@ struct context
return
hip_event_ptr
{
event
};
}
static
hip_event_ptr
create_event_for_timing
()
{
hipEvent_t
event
;
auto
status
=
hipEventCreate
(
&
event
);
if
(
status
!=
hipSuccess
)
MIGRAPHX_THROW
(
"Failed to create event"
);
return
hip_event_ptr
{
event
};
}
value
to_value
()
const
{
value
result
;
...
...
@@ -265,12 +276,73 @@ struct context
this
->
current_device
=
std
::
make_shared
<
hip_device
>
(
0
,
n_streams
);
}
void
wait_for
(
any_ptr
queue
)
{
auto
status
=
hipEventRecord
(
begin_event
.
get
(),
queue
.
get
<
hipStream_t
>
());
if
(
status
!=
hipSuccess
)
MIGRAPHX_THROW
(
"failed to record "
+
hip_error
(
status
));
get_stream
().
wait
(
begin_event
.
get
());
}
void
finish_on
(
any_ptr
queue
)
{
get_stream
().
record
(
finish_event
.
get
());
auto
status
=
hipStreamWaitEvent
(
queue
.
get
<
hipStream_t
>
(),
finish_event
.
get
(),
0
);
if
(
status
!=
hipSuccess
)
MIGRAPHX_THROW
(
"Failed to wait on event "
+
hip_error
(
status
));
}
any_ptr
get_queue
()
{
return
get_stream
().
get
();
}
void
enable_perf_measurement
(
bool
b
=
true
)
{
if
(
b
)
{
start_event
=
create_event_for_timing
();
stop_event
=
create_event_for_timing
();
get_stream
().
record
(
start_event
.
get
());
get_stream
().
record
(
stop_event
.
get
());
}
else
{
start_event
=
nullptr
;
stop_event
=
nullptr
;
}
measure_perf
=
b
;
}
std
::
pair
<
hipEvent_t
,
hipEvent_t
>
get_perf_events
()
const
{
if
(
measure_perf
)
return
std
::
make_pair
(
start_event
.
get
(),
stop_event
.
get
());
return
std
::
make_pair
(
nullptr
,
nullptr
);
}
float
get_elapsed_ms
()
const
{
float
result
=
0
;
if
(
start_event
!=
nullptr
and
stop_event
!=
nullptr
)
{
auto
status
=
hipEventElapsedTime
(
&
result
,
start_event
.
get
(),
stop_event
.
get
());
if
(
status
!=
hipSuccess
)
MIGRAPHX_THROW
(
"Failed hipEventElapsedTime: "
+
hip_error
(
status
));
}
return
result
;
}
private:
// TODO: Make this a vector to support multiple devices
std
::
shared_ptr
<
hip_device
>
current_device
;
std
::
vector
<
shared
<
hip_event_ptr
>>
events
;
bool
measure_perf
=
false
;
// for event perf timing
shared
<
hip_event_ptr
>
start_event
=
nullptr
;
shared
<
hip_event_ptr
>
stop_event
=
nullptr
;
// for stream syncronization
shared
<
hip_event_ptr
>
begin_event
=
nullptr
;
shared
<
hip_event_ptr
>
finish_event
=
nullptr
;
};
inline
void
migraphx_to_value
(
value
&
v
,
const
context
&
ctx
)
{
v
=
ctx
.
to_value
();
}
...
...
src/targets/gpu/include/migraphx/gpu/convolution.hpp
View file @
7f97b8ef
...
...
@@ -39,6 +39,10 @@ struct miopen_convolution
op
::
convolution
op
;
shared
<
convolution_descriptor
>
cd
=
nullptr
;
miopenConvFwdAlgorithm_t
algo
{};
#ifdef MIGRAPHX_HAS_FIND_2_API
value
::
binary
solution_object
{};
shared
<
miopen_solution
>
solution_ptr
=
nullptr
;
#endif
uint64_t
solution_id
=
0
;
template
<
class
Self
,
class
F
>
...
...
@@ -49,6 +53,9 @@ struct miopen_convolution
f
(
self
.
op
.
dilation
,
"dilation"
),
f
(
self
.
op
.
group
,
"group"
),
f
(
self
.
op
.
padding_mode
,
"padding_mode"
),
#ifdef MIGRAPHX_HAS_FIND_2_API
f
(
self
.
solution_object
,
"solution_object"
),
#endif
f
(
self
.
solution_id
,
"solution_id"
));
}
...
...
@@ -57,7 +64,7 @@ struct miopen_convolution
argument
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
;
shape
find
(
context
&
ctx
,
const
shape
&
output_shape
,
std
::
vector
<
shape
>
inputs
);
void
finalize
(
context
&
ctx
,
const
shape
&
output_shape
,
std
::
vector
<
shape
>
inputs
);
void
finalize
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
shape
>
&
inputs
);
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
...
...
src/targets/gpu/include/migraphx/gpu/cosh.hpp
deleted
100644 → 0
View file @
2ba401f0
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_RTGLIB_COSH_HPP
#define MIGRAPHX_GUARD_RTGLIB_COSH_HPP
#include <migraphx/gpu/oper.hpp>
#include <migraphx/gpu/device/cosh.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
struct
hip_cosh
:
unary_device
<
hip_cosh
,
device
::
cosh
>
{
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/include/migraphx/gpu/device/acosh.hpp
deleted
100644 → 0
View file @
2ba401f0
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_ACOSH_HPP
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_ACOSH_HPP
#include <migraphx/argument.hpp>
#include <migraphx/config.hpp>
#include <hip/hip_runtime_api.h>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
void
acosh
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
);
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/include/migraphx/gpu/device/add.hpp
deleted
100644 → 0
View file @
2ba401f0
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_ADD_HPP
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_ADD_HPP
#include <migraphx/argument.hpp>
#include <migraphx/config.hpp>
#include <hip/hip_runtime_api.h>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
void
add
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
);
void
add
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
,
const
argument
&
arg3
);
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/include/migraphx/gpu/device/add_clip.hpp
deleted
100644 → 0
View file @
2ba401f0
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_ADD_CLIP_HPP
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_ADD_CLIP_HPP
#include <migraphx/argument.hpp>
#include <migraphx/config.hpp>
#include <hip/hip_runtime_api.h>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
void
add_clip
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
,
const
argument
&
min_arg
,
const
argument
&
max_arg
);
void
add_clip
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
,
const
argument
&
arg3
,
const
argument
&
min_arg
,
const
argument
&
max_arg
);
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/include/migraphx/gpu/device/add_relu.hpp
deleted
100644 → 0
View file @
2ba401f0
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_ADD_RELU_HPP
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_ADD_RELU_HPP
#include <migraphx/argument.hpp>
#include <migraphx/config.hpp>
#include <hip/hip_runtime_api.h>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
void
add_relu
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
);
void
add_relu
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
,
const
argument
&
arg3
);
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
Prev
1
…
7
8
9
10
11
12
13
14
15
…
23
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment