Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
5a14c0bf
Commit
5a14c0bf
authored
Oct 19, 2022
by
umangyadav
Browse files
Merge branch 'develop' into workspace_size
parents
cb01e280
5fa42993
Changes
319
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
368 additions
and
1100 deletions
+368
-1100
src/targets/gpu/fuse_mlir.cpp
src/targets/gpu/fuse_mlir.cpp
+20
-4
src/targets/gpu/fuse_ops.cpp
src/targets/gpu/fuse_ops.cpp
+13
-426
src/targets/gpu/gemm_impl.cpp
src/targets/gpu/gemm_impl.cpp
+6
-1
src/targets/gpu/include/migraphx/gpu/acos.hpp
src/targets/gpu/include/migraphx/gpu/acos.hpp
+0
-42
src/targets/gpu/include/migraphx/gpu/acosh.hpp
src/targets/gpu/include/migraphx/gpu/acosh.hpp
+0
-42
src/targets/gpu/include/migraphx/gpu/asin.hpp
src/targets/gpu/include/migraphx/gpu/asin.hpp
+0
-42
src/targets/gpu/include/migraphx/gpu/asinh.hpp
src/targets/gpu/include/migraphx/gpu/asinh.hpp
+0
-42
src/targets/gpu/include/migraphx/gpu/atan.hpp
src/targets/gpu/include/migraphx/gpu/atan.hpp
+0
-42
src/targets/gpu/include/migraphx/gpu/atanh.hpp
src/targets/gpu/include/migraphx/gpu/atanh.hpp
+0
-42
src/targets/gpu/include/migraphx/gpu/batch_norm_inference.hpp
...targets/gpu/include/migraphx/gpu/batch_norm_inference.hpp
+0
-61
src/targets/gpu/include/migraphx/gpu/ceil.hpp
src/targets/gpu/include/migraphx/gpu/ceil.hpp
+0
-42
src/targets/gpu/include/migraphx/gpu/clip.hpp
src/targets/gpu/include/migraphx/gpu/clip.hpp
+0
-61
src/targets/gpu/include/migraphx/gpu/compile_gen.hpp
src/targets/gpu/include/migraphx/gpu/compile_gen.hpp
+7
-0
src/targets/gpu/include/migraphx/gpu/concat.hpp
src/targets/gpu/include/migraphx/gpu/concat.hpp
+0
-61
src/targets/gpu/include/migraphx/gpu/concat_gpu_opt.hpp
src/targets/gpu/include/migraphx/gpu/concat_gpu_opt.hpp
+4
-2
src/targets/gpu/include/migraphx/gpu/context.hpp
src/targets/gpu/include/migraphx/gpu/context.hpp
+26
-2
src/targets/gpu/include/migraphx/gpu/convert.hpp
src/targets/gpu/include/migraphx/gpu/convert.hpp
+0
-63
src/targets/gpu/include/migraphx/gpu/convolution.hpp
src/targets/gpu/include/migraphx/gpu/convolution.hpp
+292
-16
src/targets/gpu/include/migraphx/gpu/cosh.hpp
src/targets/gpu/include/migraphx/gpu/cosh.hpp
+0
-42
src/targets/gpu/include/migraphx/gpu/deconvolution.hpp
src/targets/gpu/include/migraphx/gpu/deconvolution.hpp
+0
-67
No files found.
src/targets/gpu/fuse_mlir.cpp
View file @
5a14c0bf
...
@@ -49,7 +49,7 @@ struct mlir_conv
...
@@ -49,7 +49,7 @@ struct mlir_conv
std
::
string
name
()
const
{
return
"gpu::mlir_conv"
;
}
std
::
string
name
()
const
{
return
"gpu::mlir_conv"
;
}
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
,
const
std
::
vector
<
module_ref
>&
mods
)
const
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
,
const
std
::
vector
<
module_ref
>&
mods
)
const
{
{
check_shapes
{
inputs
,
*
this
}.
standar
d
();
check_shapes
{
inputs
,
*
this
}.
packed_or_broadcaste
d
();
if
(
mods
.
size
()
!=
1
)
if
(
mods
.
size
()
!=
1
)
MIGRAPHX_THROW
(
"should have one submodule."
);
MIGRAPHX_THROW
(
"should have one submodule."
);
if
(
inputs
.
size
()
<
2
)
if
(
inputs
.
size
()
<
2
)
...
@@ -61,13 +61,28 @@ struct mlir_conv
...
@@ -61,13 +61,28 @@ struct mlir_conv
MIGRAPHX_REGISTER_OP
(
mlir_conv
);
MIGRAPHX_REGISTER_OP
(
mlir_conv
);
namespace
{
namespace
{
MIGRAPHX_PRED_MATCHER
(
is_mlir_conv
,
instruction_ref
ins
)
{
if
(
ins
->
name
()
!=
"convolution"
)
return
false
;
value
v
=
ins
->
get_operator
().
to_value
();
auto
group
=
v
.
at
(
"group"
).
to
<
int
>
();
if
(
group
!=
1
)
return
false
;
// Avoid MLIR assertion: Index < Length && "Invalid index!"
if
(
ins
->
get_shape
().
lens
().
size
()
!=
4
)
return
false
;
return
true
;
}
struct
find_conv_pointwise
struct
find_conv_pointwise
{
{
// Find a convolution followed by a pointwise operation.
// Find a convolution followed by a pointwise operation.
auto
matcher
()
const
auto
matcher
()
const
{
{
auto
convolution
=
auto
convolution
=
match
::
skip
(
match
::
name
(
"contiguous"
))(
match
::
name
(
"convolution"
).
bind
(
"convolution"
));
match
::
skip
(
match
::
name
(
"contiguous"
))(
is_mlir_conv
(
).
bind
(
"convolution"
));
return
match
::
name
(
"pointwise"
)(
match
::
any_of
[
match
::
inputs
()](
convolution
.
bind
(
"x"
)));
return
match
::
name
(
"pointwise"
)(
match
::
any_of
[
match
::
inputs
()](
convolution
.
bind
(
"x"
)));
}
}
...
@@ -84,9 +99,10 @@ struct find_conv_pointwise
...
@@ -84,9 +99,10 @@ struct find_conv_pointwise
i
.
name
());
i
.
name
());
}))
}))
return
;
return
;
// Only fuse with fp32
for now
// Only fuse with fp32
/fp16
if
(
std
::
any_of
(
ins
->
inputs
().
begin
(),
ins
->
inputs
().
end
(),
[
&
](
auto
i
)
{
if
(
std
::
any_of
(
ins
->
inputs
().
begin
(),
ins
->
inputs
().
end
(),
[
&
](
auto
i
)
{
return
i
->
get_shape
().
type
()
!=
shape
::
type_t
::
float_type
;
return
not
contains
({
shape
::
type_t
::
float_type
,
shape
::
type_t
::
half_type
},
i
->
get_shape
().
type
());
}))
}))
return
;
return
;
std
::
sort
(
names
.
begin
(),
names
.
end
());
std
::
sort
(
names
.
begin
(),
names
.
end
());
...
...
src/targets/gpu/fuse_ops.cpp
View file @
5a14c0bf
...
@@ -26,24 +26,9 @@
...
@@ -26,24 +26,9 @@
#include <migraphx/gpu/fuse_ops.hpp>
#include <migraphx/gpu/fuse_ops.hpp>
#include <migraphx/matcher.hpp>
#include <migraphx/matcher.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <migraphx/gpu/convolution.hpp>
#include <migraphx/gpu/device_name.hpp>
#include <migraphx/gpu/device_name.hpp>
#include <migraphx/gpu/oper.hpp>
#include <migraphx/gpu/oper.hpp>
#include <migraphx/gpu/add.hpp>
#include <migraphx/gpu/mul.hpp>
#include <migraphx/gpu/gemm.hpp>
#include <migraphx/gpu/gemm.hpp>
#include <migraphx/gpu/device/layernorm.hpp>
#include <migraphx/gpu/device/gelu.hpp>
#include <migraphx/gpu/device/mul_add.hpp>
#include <migraphx/gpu/device/add_clip.hpp>
#include <migraphx/gpu/device/add_relu.hpp>
#include <migraphx/gpu/device/add_sigmoid.hpp>
#include <migraphx/gpu/device/add_tanh.hpp>
#include <migraphx/gpu/device/mul_add_relu.hpp>
#include <migraphx/gpu/device/add.hpp>
#include <migraphx/match/layernorm.hpp>
#include <migraphx/match/gelu_erf.hpp>
#include <migraphx/match/gelu_tanh.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/register_op.hpp>
#include <migraphx/register_op.hpp>
#include <migraphx/array.hpp>
#include <migraphx/array.hpp>
...
@@ -204,10 +189,12 @@ MIGRAPHX_PRED_MATCHER(fusable_conv, instruction_ref ins)
...
@@ -204,10 +189,12 @@ MIGRAPHX_PRED_MATCHER(fusable_conv, instruction_ref ins)
return
false
;
return
false
;
auto
wei
=
ins
->
inputs
().
at
(
1
)
->
get_shape
();
auto
wei
=
ins
->
inputs
().
at
(
1
)
->
get_shape
();
assert
(
wei
.
lens
().
size
()
==
4
);
assert
(
wei
.
lens
().
size
()
==
4
);
auto
conv
=
any_cast
<
miopen_convolution
>
(
ins
->
get_operator
());
auto
miopen_conv_op
=
ins
->
get_operator
().
to_value
();
if
(
conv
.
op
.
group
>
1
)
auto
algo
=
miopen_conv_op
.
at
(
"algo"
).
to
<
miopenConvFwdAlgorithm_t
>
();
auto
conv_op
=
from_value
<
op
::
convolution
>
(
miopen_conv_op
[
"op"
]);
if
(
conv_op
.
group
>
1
)
return
false
;
return
false
;
if
(
wei
.
lens
()[
1
]
>
512
and
conv
.
algo
!=
miopenConvolutionAlgoWinograd
)
if
(
wei
.
lens
()[
1
]
>
512
and
algo
!=
miopenConvolutionAlgoWinograd
)
return
false
;
return
false
;
// Do not fuse non-symmetric input
// Do not fuse non-symmetric input
...
@@ -215,109 +202,14 @@ MIGRAPHX_PRED_MATCHER(fusable_conv, instruction_ref ins)
...
@@ -215,109 +202,14 @@ MIGRAPHX_PRED_MATCHER(fusable_conv, instruction_ref ins)
if
(
input_lens
[
2
]
!=
input_lens
[
3
]
or
wei
.
lens
()[
2
]
!=
wei
.
lens
()[
3
])
if
(
input_lens
[
2
]
!=
input_lens
[
3
]
or
wei
.
lens
()[
2
]
!=
wei
.
lens
()[
3
])
return
false
;
return
false
;
auto
op
=
conv
.
op
;
// Dont fuse winograd for non-3x3s since there is no fused windograd for those configs
// Dont fuse winograd for non-3x3s since there is no fused windograd for those configs
if
(
conv
.
algo
==
miopenConvolutionAlgoWinograd
and
wei
.
lens
()[
2
]
!=
3
and
wei
.
lens
()[
3
]
!=
3
and
if
(
algo
==
miopenConvolutionAlgoWinograd
and
wei
.
lens
()[
2
]
!=
3
and
wei
.
lens
()[
3
]
!=
3
and
contains
({{
1
,
1
}},
op
.
stride
))
contains
({{
1
,
1
}},
conv_
op
.
stride
))
return
false
;
return
false
;
return
contains
({{
0
,
0
,
0
,
0
},
{
1
,
1
,
1
,
1
},
{
2
,
2
,
2
,
2
}},
op
.
padding
)
and
return
contains
({{
0
,
0
,
0
,
0
},
{
1
,
1
,
1
,
1
},
{
2
,
2
,
2
,
2
}},
conv_
op
.
padding
)
and
contains
({{
0
,
0
},
{
1
,
1
}},
op
.
stride
)
and
contains
({{
1
,
1
}},
op
.
dilation
);
contains
({{
0
,
0
},
{
1
,
1
}},
conv_
op
.
stride
)
and
contains
({{
1
,
1
}},
conv_
op
.
dilation
);
}
}
struct
hip_triadd
:
ternary_device
<
hip_triadd
,
&
device
::
add
>
{
};
MIGRAPHX_REGISTER_OP
(
hip_triadd
)
struct
hip_triadd_clip
:
quinary_device
<
hip_triadd_clip
,
&
device
::
add_clip
>
{
};
MIGRAPHX_REGISTER_OP
(
hip_triadd_clip
)
struct
hip_add_clip
:
quaternary_device
<
hip_add_clip
,
&
device
::
add_clip
>
{
};
MIGRAPHX_REGISTER_OP
(
hip_add_clip
)
struct
hip_triadd_relu
:
ternary_device
<
hip_triadd_relu
,
&
device
::
add_relu
>
{
};
MIGRAPHX_REGISTER_OP
(
hip_triadd_relu
)
struct
hip_triadd_sigmoid
:
ternary_device
<
hip_triadd_sigmoid
,
&
device
::
add_sigmoid
>
{
};
MIGRAPHX_REGISTER_OP
(
hip_triadd_sigmoid
)
struct
hip_triadd_tanh
:
ternary_device
<
hip_triadd_tanh
,
&
device
::
add_tanh
>
{
};
MIGRAPHX_REGISTER_OP
(
hip_triadd_tanh
)
struct
hip_add_relu
:
binary_device
<
hip_add_relu
,
&
device
::
add_relu
>
{
};
MIGRAPHX_REGISTER_OP
(
hip_add_relu
)
struct
hip_add_sigmoid
:
binary_device
<
hip_add_relu
,
&
device
::
add_sigmoid
>
{
};
MIGRAPHX_REGISTER_OP
(
hip_add_sigmoid
)
struct
hip_add_tanh
:
binary_device
<
hip_add_tanh
,
&
device
::
add_tanh
>
{
};
MIGRAPHX_REGISTER_OP
(
hip_add_tanh
)
struct
hip_layernorm
:
unary_device
<
hip_layernorm
,
&
device
::
layernorm
>
{
// Empty finalize to skip dimension reduction
void
finalize
(
context
&
,
const
shape
&
,
const
std
::
vector
<
shape
>&
)
{}
};
MIGRAPHX_REGISTER_OP
(
hip_layernorm
)
struct
hip_triadd_layernorm
:
ternary_device
<
hip_triadd_layernorm
,
&
device
::
triadd_layernorm
>
{
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
check_shapes
{
inputs
,
*
this
}.
has
(
4
).
standard
();
return
inputs
[
0
];
}
// Empty finalize to skip dimension reduction
void
finalize
(
context
&
,
const
shape
&
,
const
std
::
vector
<
shape
>&
)
{}
};
MIGRAPHX_REGISTER_OP
(
hip_triadd_layernorm
)
struct
hip_gelu
:
unary_device
<
hip_gelu
,
&
device
::
gelu
>
{
};
MIGRAPHX_REGISTER_OP
(
hip_gelu
)
struct
hip_add_gelu
:
binary_device
<
hip_add_gelu
,
&
device
::
add_gelu
>
{
};
MIGRAPHX_REGISTER_OP
(
hip_add_gelu
)
struct
hip_gelu_new
:
unary_device
<
hip_gelu_new
,
&
device
::
gelu_new
>
{
};
MIGRAPHX_REGISTER_OP
(
hip_gelu_new
)
struct
hip_add_gelu_new
:
binary_device
<
hip_add_gelu_new
,
&
device
::
add_gelu_new
>
{
};
MIGRAPHX_REGISTER_OP
(
hip_add_gelu_new
)
struct
hip_mul_add
:
ternary_device
<
hip_mul_add
,
&
device
::
mul_add
>
{
};
MIGRAPHX_REGISTER_OP
(
hip_mul_add
)
struct
hip_mul_add_relu
:
ternary_device
<
hip_mul_add_relu
,
&
device
::
mul_add_relu
>
{
};
MIGRAPHX_REGISTER_OP
(
hip_mul_add_relu
)
void
move_broadcasted_back
(
std
::
vector
<
instruction_ref
>&
args
)
void
move_broadcasted_back
(
std
::
vector
<
instruction_ref
>&
args
)
{
{
// Ensure the last arguments is the broadcasted one
// Ensure the last arguments is the broadcasted one
...
@@ -341,256 +233,6 @@ void move_standard_front(std::vector<instruction_ref>& args)
...
@@ -341,256 +233,6 @@ void move_standard_front(std::vector<instruction_ref>& args)
auto
gpu_name
(
const
std
::
string
&
s
)
{
return
match
::
name
(
"gpu::"
+
s
);
}
auto
gpu_name
(
const
std
::
string
&
s
)
{
return
match
::
name
(
"gpu::"
+
s
);
}
namespace
{
namespace
{
struct
find_layernorm
{
auto
matcher
()
const
{
return
match
::
layernorm
(
&
gpu_name
);
}
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
auto
ins
=
r
.
result
;
auto
x_ins
=
r
.
instructions
[
"x"
];
auto
args
=
ins
->
inputs
();
// We dont fuse for non-standard layouts
if
(
not
x_ins
->
get_shape
().
standard
())
return
;
auto
relements
=
x_ins
->
get_shape
().
lens
().
back
();
if
(
relements
>
1024
or
(
relements
%
4
!=
0
and
relements
>
256
))
return
;
m
.
replace_instruction
(
ins
,
hip_layernorm
{},
x_ins
,
args
.
back
());
}
};
struct
find_triadd_layernorm
{
auto
matcher
()
const
{
return
match
::
name
(
"gpu::layernorm"
)(
match
::
arg
(
0
)(
match
::
name
(
"gpu::triadd"
)(
match
::
used_once
(),
match
::
all_of
[
match
::
inputs
()](
match
::
standard_shape
()))));
}
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
auto
ins
=
r
.
result
;
auto
triadd
=
ins
->
inputs
().
front
();
m
.
replace_instruction
(
ins
,
hip_triadd_layernorm
{},
triadd
->
inputs
());
}
};
struct
find_gelu
{
auto
matcher
()
const
{
return
match
::
gelu_erf
(
&
gpu_name
);
}
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
auto
ins
=
r
.
result
;
auto
x_ins
=
r
.
instructions
[
"x"
];
auto
args
=
ins
->
inputs
();
m
.
replace_instruction
(
ins
,
hip_gelu
{},
x_ins
,
args
.
back
());
}
};
struct
find_add_gelu
{
auto
matcher
()
const
{
return
match
::
name
(
"gpu::gelu"
)(
match
::
arg
(
0
)(
match
::
name
(
"gpu::add"
).
bind
(
"add"
)));
}
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
auto
add_ins
=
r
.
instructions
[
"add"
];
auto
ins
=
r
.
result
;
auto
args
=
add_ins
->
inputs
();
move_standard_front
(
args
);
move_broadcasted_back
(
args
);
args
.
back
()
=
ins
->
inputs
().
back
();
m
.
replace_instruction
(
ins
,
hip_add_gelu
{},
args
);
}
};
struct
find_gelu_new
{
bool
fast_math
=
true
;
auto
matcher
()
const
{
return
match
::
gelu_tanh
(
&
gpu_name
);
}
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
auto
ins
=
r
.
result
;
auto
x_ins
=
r
.
instructions
[
"x"
];
auto
args
=
ins
->
inputs
();
if
(
fast_math
)
m
.
replace_instruction
(
ins
,
hip_gelu
{},
x_ins
,
args
.
back
());
else
m
.
replace_instruction
(
ins
,
hip_gelu_new
{},
x_ins
,
args
.
back
());
}
};
struct
find_add_gelu_new
{
auto
matcher
()
const
{
return
match
::
name
(
"gpu::gelu_new"
)(
match
::
arg
(
0
)(
match
::
name
(
"gpu::add"
).
bind
(
"add"
)));
}
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
auto
add_ins
=
r
.
instructions
[
"add"
];
auto
ins
=
r
.
result
;
auto
args
=
add_ins
->
inputs
();
move_standard_front
(
args
);
move_broadcasted_back
(
args
);
args
.
back
()
=
ins
->
inputs
().
back
();
m
.
replace_instruction
(
ins
,
hip_add_gelu_new
{},
args
);
}
};
struct
find_add_clip
{
auto
matcher
()
const
{
return
match
::
name
(
std
::
unordered_set
<
std
::
string
>
{
"gpu::clip"
,
"gpu::clipped_relu"
})(
match
::
arg
(
0
)(
match
::
any_of
(
match
::
name
(
"gpu::add"
),
match
::
name
(
"gpu::triadd"
),
match
::
any_of
[
match
::
inputs
()](
match
::
standard_shape
()))
.
bind
(
"add"
)));
}
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
auto
add_ins
=
r
.
instructions
[
"add"
];
auto
ins
=
r
.
result
;
auto
ins_args
=
ins
->
inputs
();
auto
add_args
=
add_ins
->
inputs
();
move_standard_front
(
add_args
);
move_broadcasted_back
(
add_args
);
// Use the allocation from the clip operator
add_args
.
pop_back
();
add_args
.
insert
(
add_args
.
end
(),
std
::
next
(
ins_args
.
begin
()),
ins_args
.
end
());
if
(
add_ins
->
name
()
==
"gpu::add"
)
m
.
replace_instruction
(
ins
,
hip_add_clip
{},
add_args
);
else
if
(
add_ins
->
name
()
==
"gpu::triadd"
)
m
.
replace_instruction
(
ins
,
hip_triadd_clip
{},
add_args
);
}
};
struct
find_add_unary
{
std
::
string
op_name
;
operation
binary_add_op
;
operation
ternary_add_op
;
auto
matcher
()
const
{
return
match
::
name
(
op_name
)(
match
::
arg
(
0
)(
match
::
used_once
(),
match
::
any_of
(
match
::
name
(
"gpu::add"
),
match
::
name
(
"gpu::triadd"
),
match
::
any_of
(
match
::
name
(
"@literal"
),
match
::
any_of
[
match
::
inputs
()](
match
::
standard_shape
())))
.
bind
(
"add"
)));
}
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
auto
add_ins
=
r
.
instructions
[
"add"
];
auto
ins
=
r
.
result
;
auto
args
=
add_ins
->
inputs
();
move_standard_front
(
args
);
move_broadcasted_back
(
args
);
// Use the allocation from the relu operator
args
.
back
()
=
ins
->
inputs
().
back
();
if
(
add_ins
->
name
()
==
"gpu::add"
)
m
.
replace_instruction
(
ins
,
binary_add_op
,
args
);
else
if
(
add_ins
->
name
()
==
"gpu::triadd"
)
m
.
replace_instruction
(
ins
,
ternary_add_op
,
args
);
}
};
struct
find_triadd
{
auto
matcher
()
const
{
return
match
::
name
(
"gpu::add"
)(
match
::
either_arg
(
0
,
1
)(
match
::
name
(
"gpu::add"
)(
match
::
used_once
()).
bind
(
"add"
),
match
::
any
(
match
::
any_of
(
match
::
name
(
"@literal"
),
match
::
any_of
[
match
::
inputs
()](
match
::
standard_shape
())))
.
bind
(
"input"
)));
}
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
auto
add_ins
=
r
.
instructions
[
"add"
];
auto
input_ins
=
r
.
instructions
[
"input"
];
auto
ins
=
r
.
result
;
auto
args
=
add_ins
->
inputs
();
auto
is_broadcasted
=
[](
auto
arg
)
{
return
arg
->
get_shape
().
broadcasted
();
};
if
(
std
::
count_if
(
args
.
begin
(),
args
.
end
(),
is_broadcasted
)
>
2
)
return
;
args
.
insert
(
args
.
begin
(),
input_ins
);
move_standard_front
(
args
);
move_broadcasted_back
(
args
);
args
.
back
()
=
ins
->
inputs
().
back
();
m
.
replace_instruction
(
ins
,
hip_triadd
{},
args
);
}
};
struct
find_mul_add
{
auto
matcher
()
const
{
return
match
::
name
(
"gpu::add"
)(
match
::
either_arg
(
0
,
1
)(
match
::
name
(
"gpu::mul"
)(
match
::
used_once
()).
bind
(
"mul"
),
match
::
any
().
bind
(
"b"
)));
}
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
auto
mul_ins
=
r
.
instructions
[
"mul"
];
auto
b_ins
=
r
.
instructions
[
"b"
];
auto
ins
=
r
.
result
;
auto
args
=
mul_ins
->
inputs
();
assert
(
mul_ins
!=
b_ins
);
move_standard_front
(
args
);
move_broadcasted_back
(
args
);
args
.
insert
(
std
::
prev
(
args
.
end
()),
b_ins
);
args
.
back
()
=
ins
->
inputs
().
back
();
m
.
replace_instruction
(
ins
,
hip_mul_add
{},
args
);
}
};
struct
find_mul_add_relu
{
auto
matcher
()
const
{
return
match
::
name
(
"gpu::relu"
)(
match
::
arg
(
0
)(
match
::
name
(
"gpu::mul_add"
)(
match
::
used_once
()).
bind
(
"mul_add"
)));
}
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
auto
mul_add_ins
=
r
.
instructions
[
"mul_add"
];
auto
ins
=
r
.
result
;
auto
args
=
mul_add_ins
->
inputs
();
// Use the allocation from the relu operator
args
.
back
()
=
ins
->
inputs
().
back
();
m
.
replace_instruction
(
ins
,
hip_mul_add_relu
{},
args
);
}
};
struct
miopen_fusion
struct
miopen_fusion
{
{
struct
fuse_op_data
struct
fuse_op_data
...
@@ -820,7 +462,7 @@ void apply_conv_bias(context& ctx, module& m, const match::matcher_result& r)
...
@@ -820,7 +462,7 @@ void apply_conv_bias(context& ctx, module& m, const match::matcher_result& r)
auto
ins
=
r
.
result
;
auto
ins
=
r
.
result
;
auto
input_ins
=
conv_ins
->
inputs
().
at
(
0
);
auto
input_ins
=
conv_ins
->
inputs
().
at
(
0
);
auto
weights_ins
=
conv_ins
->
inputs
().
at
(
1
);
auto
weights_ins
=
conv_ins
->
inputs
().
at
(
1
);
auto
conv_op
=
any_cast
<
miopen_
convolution
>
(
conv_ins
->
get_operator
()).
op
;
auto
conv_op
=
from_value
<
op
::
convolution
>
(
(
conv_ins
->
get_operator
()).
to_value
()[
"op"
])
;
auto
alloc_ins
=
ins
->
inputs
().
back
();
auto
alloc_ins
=
ins
->
inputs
().
back
();
auto
old_ws_ins
=
conv_ins
->
inputs
().
at
(
2
);
auto
old_ws_ins
=
conv_ins
->
inputs
().
at
(
2
);
...
@@ -886,7 +528,7 @@ struct find_conv_pointwise
...
@@ -886,7 +528,7 @@ struct find_conv_pointwise
auto
ins
=
r
.
result
;
auto
ins
=
r
.
result
;
auto
input_ins
=
conv_ins
->
inputs
().
at
(
0
);
auto
input_ins
=
conv_ins
->
inputs
().
at
(
0
);
auto
weights_ins
=
conv_ins
->
inputs
().
at
(
1
);
auto
weights_ins
=
conv_ins
->
inputs
().
at
(
1
);
auto
conv_op
=
any_cast
<
miopen_
convolution
>
(
conv_ins
->
get_operator
()
).
op
;
auto
conv_op
=
from_value
<
op
::
convolution
>
(
conv_ins
->
get_operator
()
.
to_value
()[
"op"
])
;
auto
alloc_ins
=
ins
->
inputs
().
back
();
auto
alloc_ins
=
ins
->
inputs
().
back
();
module_ref
pm
=
ins
->
module_inputs
().
front
();
module_ref
pm
=
ins
->
module_inputs
().
front
();
...
@@ -907,46 +549,6 @@ struct find_conv_pointwise
...
@@ -907,46 +549,6 @@ struct find_conv_pointwise
}
}
};
};
struct
find_gemm_add
{
auto
matcher
()
const
{
return
match
::
name
(
"gpu::add"
)(
match
::
all_of
[
match
::
inputs
()](
match
::
standard_shape
()),
match
::
either_arg
(
0
,
1
)(
match
::
used_once
().
bind
(
"c"
),
match
::
name
(
"gpu::gemm"
)(
match
::
nargs
(
3
)).
bind
(
"gemm"
)));
}
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
auto
ins
=
r
.
result
;
auto
gemm_ins
=
r
.
instructions
[
"gemm"
];
auto
c_ins
=
r
.
instructions
[
"c"
];
auto
gemm
=
any_cast
<
rocblas_gemm
<
op
::
dot
>>
(
gemm_ins
->
get_operator
());
// Already fused gemm
if
(
not
float_equal
(
gemm
.
beta
,
0
))
return
;
auto
inputs
=
gemm_ins
->
inputs
();
inputs
.
pop_back
();
auto
copy_ins
=
c_ins
;
// Insert copy
if
(
ins
==
m
.
end
()
or
c_ins
->
outputs
().
size
()
>
1
or
c_ins
->
inputs
().
empty
())
{
copy_ins
=
m
.
insert_instruction
(
ins
,
hip_copy
{},
c_ins
,
ins
->
inputs
().
back
());
}
inputs
.
push_back
(
copy_ins
);
inputs
.
push_back
(
copy_ins
);
gemm
.
beta
=
1
;
m
.
replace_instruction
(
ins
,
gemm
,
inputs
);
}
};
struct
find_gemm_pointwise
struct
find_gemm_pointwise
{
{
auto
matcher
()
const
auto
matcher
()
const
...
@@ -1185,26 +787,11 @@ struct find_layernorm_pointwise
...
@@ -1185,26 +787,11 @@ struct find_layernorm_pointwise
void
fuse_ops
::
apply
(
module
&
m
)
const
void
fuse_ops
::
apply
(
module
&
m
)
const
{
{
match
::
find_matches
(
m
,
find_contiguous_pointwise
{}
,
find_gelu
{},
find_gelu_new
{
fast_math
}
);
match
::
find_matches
(
m
,
find_contiguous_pointwise
{});
run_passes
(
m
,
{
dead_code_elimination
{}});
run_passes
(
m
,
{
dead_code_elimination
{}});
match
::
find_matches
(
m
,
find_triadd
{});
match
::
find_matches
(
m
,
find_conv_pointwise
{
ctx
},
find_conv_bias_relu
{
ctx
},
find_conv_bias
{
ctx
});
match
::
find_matches
(
m
,
find_layernorm
{},
find_conv_pointwise
{
ctx
},
find_conv_bias_relu
{
ctx
},
find_conv_bias
{
ctx
},
find_add_gelu
{},
find_add_gelu_new
{},
find_mul_add
{},
find_mul_add_relu
{},
find_add_unary
{
"gpu::relu"
,
hip_add_relu
{},
hip_triadd_relu
{}},
find_add_unary
{
"gpu::sigmoid"
,
hip_add_sigmoid
{},
hip_triadd_sigmoid
{}},
find_add_unary
{
"gpu::tanh"
,
hip_add_tanh
{},
hip_triadd_tanh
{}},
find_add_clip
{});
run_passes
(
m
,
{
dead_code_elimination
{}});
run_passes
(
m
,
{
dead_code_elimination
{}});
match
::
find_matches
(
m
,
match
::
find_matches
(
m
,
find_triadd_layernorm
{},
find_gemm_add
{},
find_layernorm_pointwise
{},
find_layernorm_pointwise
{},
find_gemm_pointwise
{},
find_gemm_pointwise
{},
find_contiguous_tranpose_gemm
{},
find_contiguous_tranpose_gemm
{},
...
...
src/targets/gpu/gemm_impl.cpp
View file @
5a14c0bf
...
@@ -176,8 +176,13 @@ void gemm_impl(context& ctx,
...
@@ -176,8 +176,13 @@ void gemm_impl(context& ctx,
auto
num_matrices
=
std
::
accumulate
(
auto
num_matrices
=
std
::
accumulate
(
out_lens
.
rbegin
()
+
2
,
out_lens
.
rend
(),
std
::
size_t
{
1
},
std
::
multiplies
<
std
::
size_t
>
());
out_lens
.
rbegin
()
+
2
,
out_lens
.
rend
(),
std
::
size_t
{
1
},
std
::
multiplies
<
std
::
size_t
>
());
if
(
num_matrices
==
1
)
if
(
num_matrices
==
1
or
(
num_matrices
>
1
and
get_batch_stride
(
args
[
1
])
==
0
)
)
{
{
// If the batch dimension of B is broadcasted, then we can
// multiply m by the batch_size and use rocblas_gemm_ex
// instead of rocblas_gemm_strided_batched_ex.
m
*=
num_matrices
;
// the rocblas_gemm API handles inputs and output matrices as
// the rocblas_gemm API handles inputs and output matrices as
// column-major format. When doing a C = A * B, we actually do
// column-major format. When doing a C = A * B, we actually do
// C^T = (B^T) * (A^T). That is the reason we input args[1] as
// C^T = (B^T) * (A^T). That is the reason we input args[1] as
...
...
src/targets/gpu/include/migraphx/gpu/acos.hpp
deleted
100644 → 0
View file @
cb01e280
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_RTGLIB_ACOS_HPP
#define MIGRAPHX_GUARD_RTGLIB_ACOS_HPP
#include <migraphx/gpu/oper.hpp>
#include <migraphx/gpu/device/acos.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
struct
hip_acos
:
unary_device
<
hip_acos
,
device
::
acos
>
{
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/include/migraphx/gpu/acosh.hpp
deleted
100644 → 0
View file @
cb01e280
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_RTGLIB_ACOSH_HPP
#define MIGRAPHX_GUARD_RTGLIB_ACOSH_HPP
#include <migraphx/gpu/oper.hpp>
#include <migraphx/gpu/device/acosh.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
struct
hip_acosh
:
unary_device
<
hip_acosh
,
device
::
acosh
>
{
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/include/migraphx/gpu/asin.hpp
deleted
100644 → 0
View file @
cb01e280
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_RTGLIB_ASIN_HPP
#define MIGRAPHX_GUARD_RTGLIB_ASIN_HPP
#include <migraphx/gpu/oper.hpp>
#include <migraphx/gpu/device/asin.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
struct
hip_asin
:
unary_device
<
hip_asin
,
device
::
asin
>
{
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/include/migraphx/gpu/asinh.hpp
deleted
100644 → 0
View file @
cb01e280
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_RTGLIB_ASINH_HPP
#define MIGRAPHX_GUARD_RTGLIB_ASINH_HPP
#include <migraphx/gpu/oper.hpp>
#include <migraphx/gpu/device/asinh.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
struct
hip_asinh
:
unary_device
<
hip_asinh
,
device
::
asinh
>
{
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/include/migraphx/gpu/atan.hpp
deleted
100644 → 0
View file @
cb01e280
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_RTGLIB_ATAN_HPP
#define MIGRAPHX_GUARD_RTGLIB_ATAN_HPP
#include <migraphx/gpu/oper.hpp>
#include <migraphx/gpu/device/atan.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
struct
hip_atan
:
unary_device
<
hip_atan
,
device
::
atan
>
{
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/include/migraphx/gpu/atanh.hpp
deleted
100644 → 0
View file @
cb01e280
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_RTGLIB_ATANH_HPP
#define MIGRAPHX_GUARD_RTGLIB_ATANH_HPP
#include <migraphx/gpu/oper.hpp>
#include <migraphx/gpu/device/atanh.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
struct
hip_atanh
:
unary_device
<
hip_atanh
,
device
::
atanh
>
{
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/include/migraphx/gpu/batch_norm_inference.hpp
deleted
100644 → 0
View file @
cb01e280
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_RTGLIB_BATCHNORM_HPP
#define MIGRAPHX_GUARD_RTGLIB_BATCHNORM_HPP
#include <migraphx/argument.hpp>
#include <migraphx/op/batch_norm_inference.hpp>
#include <migraphx/reflect.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
struct
context
;
struct
miopen_batch_norm_inference
{
op
::
batch_norm_inference
op
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
migraphx
::
reflect
(
self
.
op
,
f
);
}
std
::
string
name
()
const
{
return
"gpu::batch_norm_inference"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
;
argument
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
;
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/include/migraphx/gpu/ceil.hpp
deleted
100644 → 0
View file @
cb01e280
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_RTGLIB_CEIL_HPP
#define MIGRAPHX_GUARD_RTGLIB_CEIL_HPP
#include <migraphx/gpu/oper.hpp>
#include <migraphx/gpu/device/ceil.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
struct
hip_ceil
:
unary_device
<
hip_ceil
,
device
::
ceil
>
{
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/include/migraphx/gpu/clip.hpp
deleted
100644 → 0
View file @
cb01e280
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_RTGLIB_CLIP_HPP
#define MIGRAPHX_GUARD_RTGLIB_CLIP_HPP
#include <migraphx/argument.hpp>
#include <migraphx/reflect.hpp>
#include <migraphx/op/clip.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
struct
context
;
struct
hip_clip
{
op
::
clip
op
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
migraphx
::
reflect
(
self
.
op
,
f
);
}
std
::
string
name
()
const
{
return
"gpu::clip"
;
}
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
;
argument
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
;
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/include/migraphx/gpu/compile_gen.hpp
View file @
5a14c0bf
...
@@ -36,6 +36,9 @@ inline namespace MIGRAPHX_INLINE_NS {
...
@@ -36,6 +36,9 @@ inline namespace MIGRAPHX_INLINE_NS {
struct
shape
;
struct
shape
;
namespace
gpu
{
namespace
gpu
{
struct
context
;
namespace
gen
{
namespace
gen
{
struct
vectorize
struct
vectorize
...
@@ -43,6 +46,10 @@ struct vectorize
...
@@ -43,6 +46,10 @@ struct vectorize
std
::
size_t
size
=
1
;
std
::
size_t
size
=
1
;
std
::
size_t
axis
=
0
;
std
::
size_t
axis
=
0
;
static
vectorize
elements
(
std
::
size_t
axis
,
const
std
::
vector
<
shape
>&
inputs
);
static
vectorize
elements
(
std
::
size_t
axis
,
const
std
::
vector
<
shape
>&
inputs
);
static
vectorize
elements
(
context
&
ctx
,
std
::
size_t
axis
,
const
std
::
vector
<
shape
>&
inputs
);
static
vectorize
elements
(
std
::
size_t
axis
,
const
std
::
vector
<
shape
>&
inputs
,
const
std
::
vector
<
std
::
size_t
>&
sizes
);
std
::
string
str
()
const
;
std
::
string
str
()
const
;
};
};
struct
preload
struct
preload
...
...
src/targets/gpu/include/migraphx/gpu/concat.hpp
deleted
100644 → 0
View file @
cb01e280
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_RTGLIB_CONCAT_HPP
#define MIGRAPHX_GUARD_RTGLIB_CONCAT_HPP
#include <migraphx/argument.hpp>
#include <migraphx/reflect.hpp>
#include <migraphx/op/concat.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
struct
context
;
struct
hip_concat
{
op
::
concat
op
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
migraphx
::
reflect
(
self
.
op
,
f
);
}
std
::
string
name
()
const
{
return
"gpu::concat"
;
}
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
;
argument
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
;
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/include/migraphx/gpu/concat_gpu_opt.hpp
View file @
5a14c0bf
...
@@ -24,8 +24,9 @@
...
@@ -24,8 +24,9 @@
#ifndef MIGRAPHX_GUARD_RTGLIB_CONCAT_GPU_OPT_HPP
#ifndef MIGRAPHX_GUARD_RTGLIB_CONCAT_GPU_OPT_HPP
#define MIGRAPHX_GUARD_RTGLIB_CONCAT_GPU_OPT_HPP
#define MIGRAPHX_GUARD_RTGLIB_CONCAT_GPU_OPT_HPP
#include <migraphx/
gpu
/concat.hpp>
#include <migraphx/
op
/concat.hpp>
#include <migraphx/operation.hpp>
#include <migraphx/operation.hpp>
#include <migraphx/serialize.hpp>
namespace
migraphx
{
namespace
migraphx
{
namespace
gpu
{
namespace
gpu
{
...
@@ -36,7 +37,8 @@ struct concat_gpu_optimization
...
@@ -36,7 +37,8 @@ struct concat_gpu_optimization
std
::
string
allocate
()
const
{
return
"hip::allocate"
;
}
std
::
string
allocate
()
const
{
return
"hip::allocate"
;
}
migraphx
::
op
::
concat
get_concat
(
const
migraphx
::
operation
&
op
)
const
migraphx
::
op
::
concat
get_concat
(
const
migraphx
::
operation
&
op
)
const
{
{
return
migraphx
::
any_cast
<
migraphx
::
gpu
::
hip_concat
>
(
op
).
op
;
auto
v
=
op
.
to_value
();
return
from_value
<
migraphx
::
op
::
concat
>
(
v
.
at
(
"op"
));
}
}
};
};
...
...
src/targets/gpu/include/migraphx/gpu/context.hpp
View file @
5a14c0bf
...
@@ -197,7 +197,9 @@ struct hip_device
...
@@ -197,7 +197,9 @@ struct hip_device
struct
context
struct
context
{
{
context
(
std
::
size_t
device_id
=
0
,
std
::
size_t
n
=
value_of
(
MIGRAPHX_NSTREAMS
{},
1
))
context
(
std
::
size_t
device_id
=
0
,
std
::
size_t
n
=
value_of
(
MIGRAPHX_NSTREAMS
{},
1
))
:
current_device
(
std
::
make_shared
<
hip_device
>
(
device_id
,
n
))
:
current_device
(
std
::
make_shared
<
hip_device
>
(
device_id
,
n
)),
begin_event
(
create_event
()),
finish_event
(
create_event
())
{
{
}
}
...
@@ -274,6 +276,24 @@ struct context
...
@@ -274,6 +276,24 @@ struct context
this
->
current_device
=
std
::
make_shared
<
hip_device
>
(
0
,
n_streams
);
this
->
current_device
=
std
::
make_shared
<
hip_device
>
(
0
,
n_streams
);
}
}
void
wait_for
(
any_ptr
queue
)
{
auto
status
=
hipEventRecord
(
begin_event
.
get
(),
queue
.
get
<
hipStream_t
>
());
if
(
status
!=
hipSuccess
)
MIGRAPHX_THROW
(
"failed to record "
+
hip_error
(
status
));
get_stream
().
wait
(
begin_event
.
get
());
}
void
finish_on
(
any_ptr
queue
)
{
get_stream
().
record
(
finish_event
.
get
());
auto
status
=
hipStreamWaitEvent
(
queue
.
get
<
hipStream_t
>
(),
finish_event
.
get
(),
0
);
if
(
status
!=
hipSuccess
)
MIGRAPHX_THROW
(
"Failed to wait on event "
+
hip_error
(
status
));
}
any_ptr
get_queue
()
{
return
get_stream
().
get
();
}
any_ptr
get_queue
()
{
return
get_stream
().
get
();
}
void
enable_perf_measurement
(
bool
b
=
true
)
void
enable_perf_measurement
(
bool
b
=
true
)
...
@@ -316,9 +336,13 @@ struct context
...
@@ -316,9 +336,13 @@ struct context
// TODO: Make this a vector to support multiple devices
// TODO: Make this a vector to support multiple devices
std
::
shared_ptr
<
hip_device
>
current_device
;
std
::
shared_ptr
<
hip_device
>
current_device
;
std
::
vector
<
shared
<
hip_event_ptr
>>
events
;
std
::
vector
<
shared
<
hip_event_ptr
>>
events
;
bool
measure_perf
=
false
;
bool
measure_perf
=
false
;
// for event perf timing
shared
<
hip_event_ptr
>
start_event
=
nullptr
;
shared
<
hip_event_ptr
>
start_event
=
nullptr
;
shared
<
hip_event_ptr
>
stop_event
=
nullptr
;
shared
<
hip_event_ptr
>
stop_event
=
nullptr
;
// for stream syncronization
shared
<
hip_event_ptr
>
begin_event
=
nullptr
;
shared
<
hip_event_ptr
>
finish_event
=
nullptr
;
};
};
inline
void
migraphx_to_value
(
value
&
v
,
const
context
&
ctx
)
{
v
=
ctx
.
to_value
();
}
inline
void
migraphx_to_value
(
value
&
v
,
const
context
&
ctx
)
{
v
=
ctx
.
to_value
();
}
...
...
src/targets/gpu/include/migraphx/gpu/convert.hpp
deleted
100644 → 0
View file @
cb01e280
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_RTGLIB_CONVERT_HPP
#define MIGRAPHX_GUARD_RTGLIB_CONVERT_HPP
#include <migraphx/argument.hpp>
#include <migraphx/reflect.hpp>
#include <migraphx/op/convert.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
struct
context
;
struct
hip_convert
{
op
::
convert
op
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
migraphx
::
reflect
(
self
.
op
,
f
);
}
std
::
string
name
()
const
{
return
"gpu::convert"
;
}
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
;
argument
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
;
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/include/migraphx/gpu/convolution.hpp
View file @
5a14c0bf
...
@@ -25,45 +25,321 @@
...
@@ -25,45 +25,321 @@
#define MIGRAPHX_GUARD_RTGLIB_CONVOLUTION_HPP
#define MIGRAPHX_GUARD_RTGLIB_CONVOLUTION_HPP
#include <migraphx/shape.hpp>
#include <migraphx/shape.hpp>
#include <migraphx/op/convolution.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/operation.hpp>
#include <migraphx/register_op.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <migraphx/op/identity.hpp>
#include <migraphx/op/convolution.hpp>
#include <migraphx/op/quant_convolution.hpp>
#include <migraphx/op/deconvolution.hpp>
#include <unordered_map>
#include <migraphx/reflect.hpp>
#include <migraphx/gpu/context.hpp>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
gpu
{
struct
context
;
inline
shape
reshape_if_1d
(
const
shape
&
input
)
{
shape
new_shape
{
input
};
auto
dims
=
new_shape
.
lens
();
if
(
dims
.
size
()
==
3
)
{
std
::
vector
<
size_t
>
new_dims
=
dims
;
new_dims
.
insert
(
new_dims
.
begin
()
+
2
,
1
);
new_shape
=
shape
{
input
.
type
(),
new_dims
};
}
return
new_shape
;
}
template
<
class
Op
>
struct
miopen_convolution
struct
miopen_convolution
{
{
op
::
convolution
op
;
Op
op
;
bool
int8_x4_format
=
false
;
shared
<
convolution_descriptor
>
cd
=
nullptr
;
shared
<
convolution_descriptor
>
cd
=
nullptr
;
miopenConvAlgorithm_t
algo
{};
miopenConvAlgorithm_t
algo
{};
#ifdef MIGRAPHX_HAS_FIND_2_API
value
::
binary
solution_object
{};
shared
<
miopen_solution
>
solution_ptr
=
nullptr
;
#endif
uint64_t
solution_id
=
0
;
uint64_t
solution_id
=
0
;
template
<
class
Self
,
class
F
>
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
static
auto
reflect
(
Self
&
self
,
F
f
)
{
{
return
pack
(
f
(
self
.
op
.
padding
,
"padding"
),
return
pack
(
f
(
self
.
op
,
"op"
),
f
(
self
.
op
.
stride
,
"stride"
),
#ifdef MIGRAPHX_HAS_FIND_2_API
f
(
self
.
op
.
dilation
,
"dilation"
),
f
(
self
.
solution_object
,
"solution_object"
),
f
(
self
.
op
.
group
,
"group"
),
#endif
f
(
self
.
op
.
padding_mode
,
"padding_mode"
),
f
(
self
.
algo
,
"algo"
),
f
(
self
.
int8_x4_format
,
"int8_x4_format"
),
f
(
self
.
solution_id
,
"solution_id"
));
f
(
self
.
solution_id
,
"solution_id"
));
}
}
std
::
string
name
()
const
{
return
"gpu::convolution"
;
}
std
::
string
name
()
const
{
return
"gpu::"
+
op
.
name
();
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
;
inline
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
check_shapes
{
inputs
,
op
}.
has
(
4
).
standard
();
std
::
vector
<
shape
>
conv_inputs
(
inputs
.
begin
(),
inputs
.
begin
()
+
2
);
check_shapes
{
conv_inputs
,
op
}.
max_ndims
(
5
);
return
migraphx
::
compute_shape
<
Op
>
(
op
,
conv_inputs
);
}
argument
argument
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
;
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
shape
find
(
context
&
ctx
,
const
shape
&
output_shape
,
std
::
vector
<
shape
>
inputs
);
{
void
finalize
(
context
&
ctx
,
const
shape
&
output_shape
,
std
::
vector
<
shape
>
inputs
);
auto
x_desc
=
make_tensor
(
reshape_if_1d
(
args
[
0
].
get_shape
()),
int8_x4_format
);
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
auto
w_desc
=
make_tensor
(
reshape_if_1d
(
args
[
1
].
get_shape
()),
int8_x4_format
);
auto
y_desc
=
make_tensor
(
reshape_if_1d
(
output_shape
));
auto
*
miopen_stream_handle
=
ctx
.
get_stream
().
get_miopen
();
auto
workspace_size
=
args
[
2
].
get_shape
().
bytes
();
#ifdef MIGRAPHX_HAS_FIND_2_API
{
const
miopenTensorArgument_t
tensor_args
[
3
]
=
{
{
miopenTensorConvolutionX
,
nullptr
,
args
[
0
].
implicit
()},
{
miopenTensorConvolutionW
,
nullptr
,
args
[
1
].
implicit
()},
{
miopenTensorConvolutionY
,
nullptr
,
args
[
3
].
implicit
()},
};
if
(
solution_ptr
.
get
()
==
nullptr
)
MIGRAPHX_THROW
(
"MIOpen "
+
op
.
name
()
+
" : Load MIOpen Solution before running it"
);
auto
status
=
miopenRunSolution
(
miopen_stream_handle
,
solution_ptr
.
get
(),
3
,
tensor_args
,
args
[
2
].
implicit
(),
workspace_size
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen "
+
op
.
name
()
+
" : running convolution using find_2.0 failed"
);
return
args
[
3
];
}
#else
// else use immediate mode
if
(
solution_id
==
0
)
MIGRAPHX_THROW
(
"MIOpen "
+
op
.
name
()
+
" : invalid solution ID"
);
auto
status
=
miopenConvolutionForwardImmediate
(
miopen_stream_handle
,
w_desc
.
get
(),
args
[
1
].
implicit
(),
x_desc
.
get
(),
args
[
0
].
implicit
(),
cd
.
get
(),
y_desc
.
get
(),
args
[
3
].
implicit
(),
args
[
2
].
implicit
(),
workspace_size
,
solution_id
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen "
+
op
.
name
()
+
": running convolution failed"
);
return
args
[
3
];
#endif
}
inline
void
set_conv_descriptor
()
{
if
(
cd
==
nullptr
)
{
cd
=
(
op
.
name
()
==
"deconvolution"
)
?
make_deconv
(
op
)
:
make_conv
(
op
);
}
}
value
compile
(
migraphx
::
context
&
ctx
,
const
shape
&
output
,
const
std
::
vector
<
shape
>&
input
)
{
set_conv_descriptor
();
auto
ws
=
find
(
any_cast
<
migraphx
::
gpu
::
context
>
(
ctx
),
output
,
input
);
return
{{
"workspace"
,
ws
.
bytes
()}};
}
shape
find
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
shape
>&
inputs
)
{
shape
workspace_shape
{};
auto
x_desc
=
make_tensor
(
reshape_if_1d
(
inputs
[
0
]),
int8_x4_format
);
auto
w_desc
=
make_tensor
(
reshape_if_1d
(
inputs
[
1
]),
int8_x4_format
);
auto
y_desc
=
make_tensor
(
reshape_if_1d
(
output_shape
));
std
::
size_t
workspace_size
=
0
;
#ifdef MIGRAPHX_HAS_FIND_2_API
{
auto
conv_problem
=
make_obj
<
miopen_problem
>
(
&
miopenCreateConvProblem
,
cd
.
get
(),
miopenProblemDirectionForward
);
set_tensor_descriptor
(
miopenTensorConvolutionX
,
x_desc
,
conv_problem
);
set_tensor_descriptor
(
miopenTensorConvolutionW
,
w_desc
,
conv_problem
);
set_tensor_descriptor
(
miopenTensorConvolutionY
,
y_desc
,
conv_problem
);
auto
*
miopen_stream_handle
=
ctx
.
get_stream
().
get_miopen
();
solution_ptr
=
find_solution
(
miopen_stream_handle
,
conv_problem
.
get
());
auto
status
=
miopenGetSolutionWorkspaceSize
(
solution_ptr
.
get
(),
&
workspace_size
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen"
+
op
.
name
()
+
" : failed to get solution's workspace size"
);
std
::
size_t
solution_size
;
status
=
miopenGetSolutionSize
(
solution_ptr
.
get
(),
&
solution_size
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen"
+
op
.
name
()
+
": Failed to fetch solution size"
);
auto
solution_binary
=
std
::
vector
<
char
>
{};
solution_binary
.
resize
(
solution_size
);
status
=
miopenSaveSolution
(
solution_ptr
.
get
(),
solution_binary
.
data
());
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen"
+
op
.
name
()
+
": Saving solution failed"
);
solution_object
=
value
::
binary
{
solution_binary
.
data
(),
solution_size
};
return
shape
{
shape
::
int8_type
,
{
workspace_size
}};
}
#else
auto
status
=
miopenConvolutionForwardGetWorkSpaceSize
(
ctx
.
get_stream
().
get_miopen
(),
w_desc
.
get
(),
x_desc
.
get
(),
cd
.
get
(),
y_desc
.
get
(),
&
workspace_size
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen"
+
op
.
name
()
+
" : Failed to get forward workspace size"
);
workspace_shape
=
shape
{
shape
::
int8_type
,
{
workspace_size
}};
auto
x_shape
=
inputs
[
0
];
auto
w_shape
=
inputs
[
1
];
if
(
int8_x4_format
)
{
x_shape
=
pack_int8_shape
(
x_shape
);
w_shape
=
pack_int8_shape
(
w_shape
);
}
auto
x
=
to_gpu
(
generate_argument
(
x_shape
));
auto
w
=
to_gpu
(
generate_argument
(
w_shape
));
auto
y
=
allocate_gpu
(
output_shape
);
auto
workspace
=
allocate_gpu
(
workspace_shape
);
int
algo_count
=
1
;
miopenConvAlgoPerf_t
perf
;
status
=
miopenFindConvolutionForwardAlgorithm
(
ctx
.
get_stream
().
get_miopen
(),
x_desc
.
get
(),
x
.
implicit
(),
w_desc
.
get
(),
w
.
implicit
(),
cd
.
get
(),
y_desc
.
get
(),
y
.
implicit
(),
1
,
&
algo_count
,
&
perf
,
workspace
.
implicit
(),
workspace_size
,
false
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen "
+
op
.
name
()
+
" : find convolution failed"
);
algo
=
perf
.
fwd_algo
;
size_t
solution_count
;
status
=
miopenConvolutionForwardGetSolutionCount
(
ctx
.
get_stream
().
get_miopen
(),
w_desc
.
get
(),
x_desc
.
get
(),
cd
.
get
(),
y_desc
.
get
(),
&
solution_count
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen "
+
op
.
name
()
+
": get solution count failed"
);
std
::
vector
<
miopenConvSolution_t
>
solutions
(
solution_count
);
status
=
miopenConvolutionForwardGetSolution
(
ctx
.
get_stream
().
get_miopen
(),
w_desc
.
get
(),
x_desc
.
get
(),
cd
.
get
(),
y_desc
.
get
(),
solution_count
,
&
solution_count
,
solutions
.
data
());
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen "
+
op
.
name
()
+
": get solution failed"
);
solution_id
=
solutions
.
front
().
solution_id
;
return
shape
{
shape
::
int8_type
,
{
perf
.
memory
}};
#endif
}
void
finalize
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
shape
>&
inputs
)
{
#ifdef MIGRAPHX_HAS_FIND_2_API
{
(
void
)(
ctx
);
// avoid warnings
(
void
)(
output_shape
);
(
void
)(
inputs
);
// load solution
if
(
solution_ptr
==
nullptr
)
{
miopenSolution_t
ptr
;
auto
status
=
miopenLoadSolution
(
&
ptr
,
reinterpret_cast
<
const
char
*>
(
solution_object
.
data
()),
solution_object
.
size
());
solution_ptr
=
miopen_solution
{
ptr
};
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen "
+
op
.
name
()
+
": loading convolution solution failed"
);
}
}
#else
// Use immediate mode API
{
set_conv_descriptor
();
if
(
solution_id
==
0
)
{
// Check that workspace hasn't changed
auto
size
=
inputs
.
at
(
2
).
bytes
();
auto
ws
=
find
(
ctx
,
output_shape
,
inputs
);
if
(
ws
.
bytes
()
>
size
)
MIGRAPHX_THROW
(
"MIOpen "
+
op
.
name
()
+
": workspace has changed during finalization."
);
}
auto
x_desc
=
make_tensor
(
reshape_if_1d
(
inputs
[
0
]),
int8_x4_format
);
auto
w_desc
=
make_tensor
(
reshape_if_1d
(
inputs
[
1
]),
int8_x4_format
);
auto
y_desc
=
make_tensor
(
reshape_if_1d
(
output_shape
));
auto
status
=
miopenConvolutionForwardCompileSolution
(
ctx
.
get_stream
().
get_miopen
(),
w_desc
.
get
(),
x_desc
.
get
(),
cd
.
get
(),
y_desc
.
get
(),
solution_id
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen Convolution: compile solution failed"
);
}
#endif
}
inline
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
{
return
shapes
.
size
()
-
1
;
return
shapes
.
size
()
-
1
;
}
}
};
inline
shape
pack_int8_shape
(
const
shape
&
s
)
const
{
if
(
s
.
type
()
!=
shape
::
int8_type
)
{
return
s
;
}
auto
lens
=
s
.
lens
();
auto
strides
=
s
.
strides
();
lens
[
1
]
=
(
lens
[
1
]
+
3
)
/
4
*
4
;
strides
[
0
]
=
strides
[
1
]
*
lens
[
1
];
return
{
s
.
type
(),
lens
,
strides
};
}
};
}
// namespace gpu
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
}
// namespace migraphx
...
...
src/targets/gpu/include/migraphx/gpu/cosh.hpp
deleted
100644 → 0
View file @
cb01e280
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_RTGLIB_COSH_HPP
#define MIGRAPHX_GUARD_RTGLIB_COSH_HPP
#include <migraphx/gpu/oper.hpp>
#include <migraphx/gpu/device/cosh.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
struct
hip_cosh
:
unary_device
<
hip_cosh
,
device
::
cosh
>
{
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/include/migraphx/gpu/deconvolution.hpp
deleted
100644 → 0
View file @
cb01e280
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_RTGLIB_DECONVOLUTION_HPP
#define MIGRAPHX_GUARD_RTGLIB_DECONVOLUTION_HPP
#include <migraphx/shape.hpp>
#include <migraphx/op/deconvolution.hpp>
#include <migraphx/gpu/miopen.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
struct
context
;
struct
miopen_deconvolution
{
op
::
deconvolution
op
;
shared
<
convolution_descriptor
>
cd
;
miopenConvAlgorithm_t
algo
{};
uint64_t
solution_id
=
0
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
pack_join
(
op
::
deconvolution
::
reflect
(
self
.
op
,
f
),
pack
(
f
(
self
.
solution_id
,
"solution_id"
)));
}
std
::
string
name
()
const
{
return
"gpu::deconv"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
;
argument
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
;
shape
find
(
context
&
ctx
,
const
shape
&
output_shape
,
std
::
vector
<
shape
>
inputs
);
void
finalize
(
context
&
ctx
,
const
shape
&
output_shape
,
std
::
vector
<
shape
>
inputs
);
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
Prev
1
…
3
4
5
6
7
8
9
10
11
…
16
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment