Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
31065c7d
"IMG/cpio/vscode:/vscode.git/clone" did not exist on "879a7592bdd58d1788e952efde9895fc48a3484e"
Commit
31065c7d
authored
Oct 31, 2022
by
charlie
Browse files
Merge branch 'dyn_squeeze' of github.com:ROCmSoftwarePlatform/AMDMIGraphX into dyn_model_test
parents
6bec381f
6acbd4e4
Changes
482
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
649 additions
and
254 deletions
+649
-254
src/targets/gpu/include/migraphx/gpu/tanh.hpp
src/targets/gpu/include/migraphx/gpu/tanh.hpp
+0
-42
src/targets/gpu/include/migraphx/gpu/topk.hpp
src/targets/gpu/include/migraphx/gpu/topk.hpp
+1
-1
src/targets/gpu/include/migraphx/gpu/unary_not.hpp
src/targets/gpu/include/migraphx/gpu/unary_not.hpp
+0
-43
src/targets/gpu/jit/concat.cpp
src/targets/gpu/jit/concat.cpp
+98
-0
src/targets/gpu/jit/gathernd.cpp
src/targets/gpu/jit/gathernd.cpp
+1
-8
src/targets/gpu/jit/layernorm.cpp
src/targets/gpu/jit/layernorm.cpp
+131
-0
src/targets/gpu/jit/mlir.cpp
src/targets/gpu/jit/mlir.cpp
+1
-1
src/targets/gpu/jit/pad.cpp
src/targets/gpu/jit/pad.cpp
+100
-0
src/targets/gpu/jit/pointwise.cpp
src/targets/gpu/jit/pointwise.cpp
+11
-56
src/targets/gpu/jit/reduce.cpp
src/targets/gpu/jit/reduce.cpp
+1
-9
src/targets/gpu/jit/roialign.cpp
src/targets/gpu/jit/roialign.cpp
+0
-9
src/targets/gpu/jit/scatternd.cpp
src/targets/gpu/jit/scatternd.cpp
+4
-11
src/targets/gpu/jit/softmax.cpp
src/targets/gpu/jit/softmax.cpp
+6
-9
src/targets/gpu/kernel.cpp
src/targets/gpu/kernel.cpp
+30
-7
src/targets/gpu/kernels/include/migraphx/kernels/algorithm.hpp
...argets/gpu/kernels/include/migraphx/kernels/algorithm.hpp
+1
-1
src/targets/gpu/kernels/include/migraphx/kernels/array.hpp
src/targets/gpu/kernels/include/migraphx/kernels/array.hpp
+111
-47
src/targets/gpu/kernels/include/migraphx/kernels/concat.hpp
src/targets/gpu/kernels/include/migraphx/kernels/concat.hpp
+66
-0
src/targets/gpu/kernels/include/migraphx/kernels/functional.hpp
...rgets/gpu/kernels/include/migraphx/kernels/functional.hpp
+3
-2
src/targets/gpu/kernels/include/migraphx/kernels/index.hpp
src/targets/gpu/kernels/include/migraphx/kernels/index.hpp
+81
-5
src/targets/gpu/kernels/include/migraphx/kernels/integral_constant.hpp
...pu/kernels/include/migraphx/kernels/integral_constant.hpp
+3
-3
No files found.
src/targets/gpu/include/migraphx/gpu/tanh.hpp
deleted
100644 → 0
View file @
6bec381f
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_RTGLIB_TANH_HPP
#define MIGRAPHX_GUARD_RTGLIB_TANH_HPP
#include <migraphx/gpu/oper.hpp>
#include <migraphx/gpu/device/tanh.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
struct
hip_tanh
:
unary_device
<
hip_tanh
,
device
::
tanh
>
{
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/include/migraphx/gpu/topk.hpp
View file @
31065c7d
...
@@ -27,7 +27,7 @@
...
@@ -27,7 +27,7 @@
#include <migraphx/argument.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/reflect.hpp>
#include <migraphx/reflect.hpp>
#include <migraphx/op/topk.hpp>
#include <migraphx/op/topk.hpp>
#include <migraphx/gpu/
miopen
.hpp>
#include <migraphx/gpu/
context
.hpp>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
...
src/targets/gpu/include/migraphx/gpu/unary_not.hpp
deleted
100644 → 0
View file @
6bec381f
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_RTGLIB_UNARY_NOT_HPP
#define MIGRAPHX_GUARD_RTGLIB_UNARY_NOT_HPP
#include <migraphx/gpu/oper.hpp>
#include <migraphx/gpu/device/unary_not.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
struct
hip_unary_not
:
unary_device
<
hip_unary_not
,
device
::
unary_not
>
{
std
::
string
name
()
const
{
return
"gpu::not"
;
}
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/
include/migraphx/gpu/softmax.h
pp
→
src/targets/gpu/
jit/concat.c
pp
View file @
31065c7d
...
@@ -21,54 +21,78 @@
...
@@ -21,54 +21,78 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* THE SOFTWARE.
*/
*/
#ifndef MIGRAPHX_GUARD_RTGLIB_SOFTMAX_HPP
#include <migraphx/gpu/compiler.hpp>
#define MIGRAPHX_GUARD_RTGLIB_SOFTMAX_HPP
#include <migraphx/gpu/lowering.hpp>
#include <migraphx/manage_ptr.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/op/softmax.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/shape_for_each.hpp>
#include <migraphx/config.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <migraphx/gpu/hip.hpp>
#include <migraphx/dfor.hpp>
#include <migraphx/gpu/device/contiguous.hpp>
#include <migraphx/gpu/device/add.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/gpu/rocblas.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/context.hpp>
#include <utility>
#include <migraphx/gpu/compile_hip_code_object.hpp>
#include <migraphx/gpu/compile_hip.hpp>
#include <migraphx/gpu/compile_gen.hpp>
#include <migraphx/reduce_dims.hpp>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
gpu
{
struct
context
;
using
namespace
migraphx
::
gpu
::
gen
;
// NOLINT
// NOLINTNEXTLINE
static
const
char
*
const
concat_kernel
=
R"__migraphx__(
#include <migraphx/kernels/concat.hpp>
#include <migraphx/kernels/vectorize.hpp>
#include <args.hpp>
namespace migraphx {
extern "C" {
struct
hip_softmax
__global__ void ${kernel}(${params})
{
{
op
::
softmax
op
;
transform_args(make_tensors(), rotate_last(), ${transformers})(${args})([](auto y, auto... xs) {
concat<${axis}>(y, xs...);
});
}
}
} // namespace migraphx
template
<
class
Self
,
class
F
>
)__migraphx__"
;
static
auto
reflect
(
Self
&
self
,
F
f
)
struct
concat_compiler
:
compiler
<
concat_compiler
>
{
std
::
vector
<
std
::
string
>
names
()
const
{
return
{
"concat"
};
}
static
std
::
size_t
get_concat_elements
(
const
std
::
vector
<
shape
>&
inputs
)
{
{
return
migraphx
::
reflect
(
self
.
op
,
f
);
return
inputs
.
back
().
elements
()
/
(
inputs
.
size
()
-
1
);
}
}
std
::
string
name
()
const
{
return
"gpu::softmax"
;
}
operation
compile_op
(
context
&
ctx
,
const
std
::
vector
<
shape
>&
inputs
,
const
value
&
v
)
const
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
;
argument
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
;
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
{
return
shapes
.
size
()
-
1
;
// TODO: Use reduce_dims
hip_compile_options
options
;
options
.
inputs
=
inputs
;
options
.
output
=
inputs
.
back
();
options
.
params
=
"-Wno-float-equal"
;
auto
axis
=
find_fast_axis
(
options
.
inputs
);
auto
vec
=
vectorize
::
elements
(
ctx
,
axis
,
options
.
inputs
);
options
.
kernel_name
=
v
.
get
(
"kernel"
,
"concat_kernel"
);
options
.
set_launch_params
(
v
,
compute_global_for
(
ctx
,
get_concat_elements
(
options
.
inputs
)
/
vec
.
size
,
256
));
auto
src
=
interpolate_string
(
concat_kernel
,
{{
"kernel"
,
options
.
kernel_name
},
{
"params"
,
enum_params
(
inputs
.
size
(),
"void * private_p"
)},
{
"args"
,
enum_params
(
inputs
.
size
(),
"private_p"
)},
{
"transformers"
,
make_transformer_args
(
vec
)},
{
"axis"
,
v
.
at
(
"axis"
).
to
<
std
::
string
>
()}});
return
compile_hip_code_object
(
src
,
options
);
}
compiler_replace
compile
(
context
&
ctx
,
instruction_ref
ins
,
const
operation
&
op
)
const
{
return
replace
(
compile_op
(
ctx
,
to_shapes
(
ins
->
inputs
()),
op
.
to_value
()));
}
}
};
};
}
// namespace gpu
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
}
// namespace migraphx
#endif
src/targets/gpu/jit/gathernd.cpp
View file @
31065c7d
...
@@ -27,13 +27,6 @@
...
@@ -27,13 +27,6 @@
#include <migraphx/gpu/compile_hip_code_object.hpp>
#include <migraphx/gpu/compile_hip_code_object.hpp>
#include <migraphx/gpu/compile_hip.hpp>
#include <migraphx/gpu/compile_hip.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/reduce_dims.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/dead_code_elimination.hpp>
#include <migraphx/eliminate_common_subexpression.hpp>
#include <migraphx/module.hpp>
#include <migraphx/pass_manager.hpp>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
@@ -72,7 +65,7 @@ struct gathernd_compiler : compiler<gathernd_compiler>
...
@@ -72,7 +65,7 @@ struct gathernd_compiler : compiler<gathernd_compiler>
operation
compile_op
(
context
&
ctx
,
const
std
::
vector
<
shape
>&
inputs
,
const
value
&
v
)
const
operation
compile_op
(
context
&
ctx
,
const
std
::
vector
<
shape
>&
inputs
,
const
value
&
v
)
const
{
{
hip_compile_options
options
;
hip_compile_options
options
;
auto
out_s
=
inputs
.
back
();
const
auto
&
out_s
=
inputs
.
back
();
options
.
set_launch_params
(
v
,
compute_global_for
(
ctx
,
out_s
.
elements
()));
options
.
set_launch_params
(
v
,
compute_global_for
(
ctx
,
out_s
.
elements
()));
options
.
inputs
=
inputs
;
options
.
inputs
=
inputs
;
options
.
output
=
out_s
;
options
.
output
=
out_s
;
...
...
src/targets/gpu/jit/layernorm.cpp
0 → 100644
View file @
31065c7d
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/gpu/compiler.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/compile_hip_code_object.hpp>
#include <migraphx/gpu/compile_hip.hpp>
#include <migraphx/gpu/compile_gen.hpp>
#include <migraphx/reduce_dims.hpp>
#include <migraphx/stringutils.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
using
namespace
migraphx
::
gpu
::
gen
;
// NOLINT
static
const
char
*
const
layernorm_kernel
=
R"__migraphx__(
#include <migraphx/kernels/index.hpp>
#include <migraphx/kernels/layernorm.hpp>
#include <migraphx/kernels/vectorize.hpp>
#include <migraphx/kernels/preload.hpp>
#include <args.hpp>
namespace migraphx {
${preamble}
extern "C" {
__global__ void ${kernel}(${params})
{
transform_args(make_tensors(), rotate_last(), ${transformers})(${args})([](auto... xs) {
${layernorm}<${axis}>(${post}, ${eps}, xs...);
});
}
}
} // namespace migraphx
)__migraphx__"
;
struct
layernorm_compiler
:
compiler
<
layernorm_compiler
>
{
std
::
vector
<
std
::
string
>
names
()
const
{
return
{
"layernorm"
,
"gpu::prelayernorm"
,
"gpu::preadd_layernorm"
};
}
operation
compile_op
(
context
&
ctx
,
const
std
::
vector
<
shape
>&
inputs
,
const
value
&
v
)
const
{
// TODO: Use reduce_dims
auto
axis
=
inputs
.
front
().
lens
().
size
()
-
1
;
auto
faxis
=
find_fast_axis
({
inputs
.
front
()});
vectorize
vec
{};
// Vectorize if the axis is a reduction axis
if
(
axis
==
faxis
)
{
vec
=
vectorize
::
elements
(
ctx
,
faxis
,
inputs
);
}
auto
relements
=
inputs
[
0
].
lens
()[
axis
]
/
vec
.
size
;
auto
nelements
=
(
inputs
.
back
().
elements
()
/
inputs
[
0
].
lens
()[
axis
]);
auto
block_size
=
compute_block_size
(
relements
,
256
);
hip_compile_options
options
;
options
.
set_launch_params
(
v
,
compute_global_for
(
ctx
,
nelements
*
block_size
,
256
),
block_size
);
options
.
output
=
inputs
.
back
();
options
.
inputs
=
inputs
;
options
.
kernel_name
=
v
.
get
(
"kernel"
,
"layernorm_kernel"
);
auto
eps
=
v
.
get
(
"epsilon"
,
1e-12
f
);
auto
src
=
interpolate_string
(
layernorm_kernel
,
{{
"kernel"
,
options
.
kernel_name
},
{
"params"
,
enum_params
(
inputs
.
size
(),
"void * private_p"
)},
{
"args"
,
enum_params
(
inputs
.
size
(),
"private_p"
)},
{
"transformers"
,
make_transformer_args
(
vec
)},
{
"post"
,
v
.
get
(
"post"
,
std
::
string
{
"op::id{}"
})},
{
"preamble"
,
v
.
get
(
"preamble"
,
std
::
string
{})},
{
"layernorm"
,
v
.
get
(
"layernorm"
,
std
::
string
{
"layernorm"
})},
{
"axis"
,
to_string
(
axis
)},
{
"eps"
,
to_string
(
eps
)}});
return
compile_hip_code_object
(
src
,
options
);
}
compiler_replace
compile
(
context
&
ctx
,
instruction_ref
ins
,
const
operation
&
op
)
const
{
auto
v
=
op
.
to_value
();
v
[
"layernorm"
]
=
"layernorm"
;
v
[
"kernel"
]
=
"layernorm_kernel"
;
if
(
op
.
name
()
==
"gpu::preadd_layernorm"
)
{
v
[
"layernorm"
]
=
"add_layernorm"
;
v
[
"kernel"
]
=
"add_layernorm_kernel"
;
}
if
(
not
ins
->
module_inputs
().
empty
())
{
auto
*
pm
=
ins
->
module_inputs
().
front
();
v
[
"preamble"
]
=
generate_pointwise
(
*
pm
,
"post_layernorm"
);
v
[
"post"
]
=
"MIGRAPHX_LIFT(post_layernorm)"
;
v
[
"kernel"
]
=
v
[
"layernorm"
].
to
<
std
::
string
>
()
+
"_"
+
generate_name_from_ops
(
*
pm
)
+
"_kernel"
;
}
return
replace
(
compile_op
(
ctx
,
to_shapes
(
ins
->
inputs
()),
v
));
}
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/jit/mlir.cpp
View file @
31065c7d
...
@@ -41,7 +41,7 @@ struct mlir_compiler : compiler<mlir_compiler>
...
@@ -41,7 +41,7 @@ struct mlir_compiler : compiler<mlir_compiler>
{
{
auto
*
smod
=
ins
->
module_inputs
().
front
();
auto
*
smod
=
ins
->
module_inputs
().
front
();
assert
(
smod
->
get_parameter_names
().
size
()
==
ins
->
inputs
().
size
()
-
1
);
assert
(
smod
->
get_parameter_names
().
size
()
==
ins
->
inputs
().
size
()
-
1
);
return
insert
(
compile_mlir
(
ctx
,
*
smod
));
return
insert
(
compile_mlir
(
ctx
,
*
smod
,
ins
->
inputs
()
));
}
}
compiler_replace
insert
(
code_object_op
co
)
const
compiler_replace
insert
(
code_object_op
co
)
const
...
...
src/targets/gpu/
batch_norm_inference
.cpp
→
src/targets/gpu/
jit/pad
.cpp
View file @
31065c7d
...
@@ -21,65 +21,80 @@
...
@@ -21,65 +21,80 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* THE SOFTWARE.
*/
*/
#include <migraphx/gpu/
batch_norm_inference
.hpp>
#include <migraphx/gpu/
compiler
.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/compile_hip_code_object.hpp>
#include <migraphx/gpu/compile_hip.hpp>
#include <migraphx/gpu/compile_gen.hpp>
#include <migraphx/reduce_dims.hpp>
#include <migraphx/float_equal.hpp>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
gpu
{
shape
miopen_batch_norm_inference
::
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
using
namespace
migraphx
::
gpu
::
gen
;
// NOLINT
static
const
char
*
const
pointwise_kernel
=
R"__migraphx__(
#include <migraphx/kernels/pad.hpp>
#include <migraphx/kernels/index.hpp>
#include <migraphx/kernels/ops.hpp>
#include <args.hpp>
namespace migraphx {
extern "C" {
__global__ void pad_kernel(void* input_p, void* output_p)
{
{
check_shapes
{
inputs
,
*
this
}.
has
(
6
);
auto offsets = index_ints<${offsets}>{};
check_shapes
{
inputs
.
data
(),
inputs
.
data
()
+
1
,
*
this
}.
same_ndims
().
max_ndims
(
5
);
auto idx = make_index();
return
op
.
compute_shape
({
inputs
.
at
(
0
),
inputs
.
at
(
1
),
inputs
.
at
(
2
),
inputs
.
at
(
3
),
inputs
.
at
(
4
)});
make_tensors()(input_p, output_p)([&](auto input, auto output) {
pad(idx, offsets, input, output, ${pad_val});
});
}
}
}
inline
shape
reshape_to_2d
(
const
shape
&
input
)
} // namespace migraphx
{
auto
dims
=
input
.
lens
();
if
(
dims
.
size
()
>=
4
)
return
input
;
std
::
vector
<
size_t
>
new_dims
(
dims
.
begin
(),
dims
.
end
());
)__migraphx__"
;
std
::
size_t
num
=
4
-
dims
.
size
();
new_dims
.
insert
(
new_dims
.
end
(),
num
,
1
);
return
{
input
.
type
(),
new_dims
};
}
argument
miopen_batch_norm_inference
::
compute
(
context
&
ctx
,
struct
pad_compiler
:
compiler
<
pad_compiler
>
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
{
{
shape
x_shape
=
args
[
0
].
get_shape
();
std
::
vector
<
std
::
string
>
names
()
const
{
return
{
"pad"
};
}
shape
y_shape
=
output_shape
;
shape
bn_shape
=
args
[
3
].
get_shape
();
auto
x_desc
=
make_tensor
(
reshape_to_2d
(
x_shape
));
operation
compile_op
(
context
&
ctx
,
const
std
::
vector
<
shape
>&
inputs
,
const
value
&
v
)
const
auto
y_desc
=
make_tensor
(
reshape_to_2d
(
y_shape
));
{
auto
bn_desc
=
make_tensor
(
reshape_to_2d
(
bn_shape
));
hip_compile_options
options
;
options
.
inputs
=
inputs
;
options
.
output
=
inputs
.
back
();
options
.
virtual_inputs
=
reduce_dims
(
inputs
);
options
.
kernel_name
=
"pad_kernel"
;
options
.
set_launch_params
(
v
,
compute_global_for
(
ctx
,
inputs
.
at
(
1
).
elements
()));
float
alpha
=
1.0
;
auto
pad_val
=
v
.
get
(
"value"
,
0.
f
);
float
beta
=
0.0
f
;
auto
pad_val_string
=
to_string
(
pad_val
);
if
(
float_equal
(
pad_val
,
std
::
numeric_limits
<
float
>::
lowest
()))
pad_val_string
=
"lowest{}"
;
if
(
float_equal
(
pad_val
,
std
::
numeric_limits
<
float
>::
max
()))
pad_val_string
=
"highest{}"
;
miopenBatchNormalizationForwardInference
(
ctx
.
get_stream
().
get_miopen
(),
auto
padding
=
v
.
at
(
"pads"
).
to_vector
<
int64_t
>
();
miopenBatchNormMode_t
(
op
.
bn_mode
),
auto
input_lens
=
inputs
.
front
().
lens
();
&
alpha
,
std
::
vector
<
size_t
>
offsets
(
input_lens
.
size
());
&
beta
,
std
::
copy
(
padding
.
begin
(),
padding
.
begin
()
+
offsets
.
size
(),
offsets
.
begin
());
x_desc
.
get
(),
args
[
0
].
implicit
(),
y_desc
.
get
(),
args
[
5
].
implicit
(),
bn_desc
.
get
(),
args
[
1
].
implicit
(),
args
[
2
].
implicit
(),
args
[
3
].
implicit
(),
args
[
4
].
implicit
(),
op
.
epsilon
);
return
args
[
5
];
auto
src
=
interpolate_string
(
}
pointwise_kernel
,
{{
"pad_val"
,
to_string
(
pad_val_string
)},
{
"offsets"
,
to_string_range
(
offsets
)}});
return
compile_hip_code_object
(
src
,
options
);
}
compiler_replace
compile
(
context
&
ctx
,
instruction_ref
ins
,
const
operation
&
op
)
const
{
return
replace
(
compile_op
(
ctx
,
to_shapes
(
ins
->
inputs
()),
op
.
to_value
()));
}
};
}
// namespace gpu
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
}
// namespace migraphx
src/targets/gpu/jit/pointwise.cpp
View file @
31065c7d
...
@@ -26,16 +26,7 @@
...
@@ -26,16 +26,7 @@
#include <migraphx/gpu/compile_hip_code_object.hpp>
#include <migraphx/gpu/compile_hip_code_object.hpp>
#include <migraphx/gpu/compile_hip.hpp>
#include <migraphx/gpu/compile_hip.hpp>
#include <migraphx/gpu/compile_gen.hpp>
#include <migraphx/gpu/compile_gen.hpp>
#include <migraphx/cpp_generator.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/reduce_dims.hpp>
#include <migraphx/reduce_dims.hpp>
#include <migraphx/permutation.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/dead_code_elimination.hpp>
#include <migraphx/eliminate_common_subexpression.hpp>
#include <migraphx/module.hpp>
#include <migraphx/pass_manager.hpp>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
@@ -65,18 +56,6 @@ __global__ void ${kernel}(${params})
...
@@ -65,18 +56,6 @@ __global__ void ${kernel}(${params})
)__migraphx__"
;
)__migraphx__"
;
static
std
::
vector
<
std
::
string
>
get_op_names
(
const
module
&
m
)
{
std
::
vector
<
std
::
string
>
result
;
for
(
auto
&
ins
:
m
)
{
if
(
starts_with
(
ins
.
name
(),
"@"
))
continue
;
result
.
push_back
(
ins
.
name
());
}
return
result
;
}
struct
pointwise_compiler
:
compiler
<
pointwise_compiler
>
struct
pointwise_compiler
:
compiler
<
pointwise_compiler
>
{
{
std
::
vector
<
std
::
string
>
names
()
const
{
return
{
"pointwise"
,
"contiguous"
};
}
std
::
vector
<
std
::
string
>
names
()
const
{
return
{
"pointwise"
,
"contiguous"
};
}
...
@@ -96,20 +75,16 @@ struct pointwise_compiler : compiler<pointwise_compiler>
...
@@ -96,20 +75,16 @@ struct pointwise_compiler : compiler<pointwise_compiler>
options
.
virtual_inputs
=
reduce_dims
(
inputs
);
options
.
virtual_inputs
=
reduce_dims
(
inputs
);
options
.
params
=
"-Wno-float-equal"
;
options
.
params
=
"-Wno-float-equal"
;
auto
axis
=
find_fast_axis
(
options
.
virtual_inputs
);
auto
axis
=
find_fast_axis
(
options
.
virtual_inputs
);
auto
vec
=
vectorize
::
elements
(
axis
,
options
.
virtual_inputs
);
auto
vec
=
vectorize
::
elements
(
ctx
,
axis
,
options
.
virtual_inputs
);
auto
preloads
=
preload
::
broadcasts
(
axis
,
options
.
virtual_inputs
);
options
.
kernel_name
=
v
.
get
(
"kernel"
,
"kernel"
);
options
.
kernel_name
=
v
.
get
(
"kernel"
,
"kernel"
);
options
.
set_launch_params
(
options
.
set_launch_params
(
v
,
v
,
compute_global_for
(
ctx
,
options
.
output
.
elements
()
/
vec
.
size
,
256
));
compute_global_for
(
ctx
,
options
.
output
.
elements
()
/
vec
.
size
,
oversubscribe_if
(
not
preloads
.
is_preloading
())));
auto
src
=
interpolate_string
(
pointwise_kernel
,
auto
src
=
interpolate_string
(
pointwise_kernel
,
{{
"kernel"
,
options
.
kernel_name
},
{{
"kernel"
,
options
.
kernel_name
},
{
"params"
,
enum_params
(
inputs
.
size
(),
"void * private_p"
)},
{
"params"
,
enum_params
(
inputs
.
size
(),
"void * private_p"
)},
{
"args"
,
enum_params
(
inputs
.
size
(),
"private_p"
)},
{
"args"
,
enum_params
(
inputs
.
size
(),
"private_p"
)},
{
"lambda"
,
v
.
at
(
"lambda"
).
to
<
std
::
string
>
()},
{
"lambda"
,
v
.
at
(
"lambda"
).
to
<
std
::
string
>
()},
{
"transformers"
,
make_transformer_args
(
preloads
,
vec
)},
{
"transformers"
,
make_transformer_args
(
vec
)},
{
"preamble"
,
v
.
get
(
"preamble"
,
std
::
string
{})}});
{
"preamble"
,
v
.
get
(
"preamble"
,
std
::
string
{})}});
return
compile_hip_code_object
(
src
,
options
);
return
compile_hip_code_object
(
src
,
options
);
}
}
...
@@ -126,34 +101,14 @@ struct pointwise_compiler : compiler<pointwise_compiler>
...
@@ -126,34 +101,14 @@ struct pointwise_compiler : compiler<pointwise_compiler>
else
else
{
{
assert
(
not
ins
->
module_inputs
().
empty
());
assert
(
not
ins
->
module_inputs
().
empty
());
auto
*
pm
=
ins
->
module_inputs
().
front
();
auto
*
pm
=
ins
->
module_inputs
().
front
();
run_passes
(
*
pm
,
{
eliminate_common_subexpression
{},
dead_code_elimination
{}});
auto
pf
=
generate_pointwise
(
*
pm
,
"inner_pointwise"
);
cpp_generator
g
;
std
::
string
lambda
=
"MIGRAPHX_LIFT(inner_pointwise)"
;
g
.
fmap
([](
const
std
::
string
&
fname
)
{
return
"migraphx::"
+
fname
;
});
auto
kernel_name
=
generate_name_from_ops
(
*
pm
)
+
"_kernel"
;
g
.
add_point_op
(
"where"
,
"${function:where}(${0}, ${1}, ${2})"
);
return
replace
(
g
.
add_point_op
(
"prelu"
,
"${function:where}(${0} < 0, ${0} * ${1}, ${0})"
);
compile_op
(
ctx
,
g
.
add_point_op
(
"sign"
,
to_shapes
(
ins
->
inputs
()),
"${function:where}(${0} > 0, 1, ${function:where}(${0} < 0, -1, 0))"
);
{{
"lambda"
,
lambda
},
{
"preamble"
,
pf
},
{
"kernel"
,
kernel_name
}}));
g
.
add_point_op
(
"equal"
,
"migraphx::abs(${0} == ${1})"
);
g
.
add_point_op
(
"less"
,
"migraphx::abs(${0} < ${1})"
);
g
.
add_point_op
(
"greater"
,
"migraphx::abs(${0} > ${1})"
);
g
.
add_point_op
(
"not"
,
"migraphx::abs(not ${0})"
);
g
.
add_point_op
(
"mod"
,
"migraphx::mod(${0}, ${1})"
);
g
.
add_point_op
(
"fmod"
,
"migraphx::fmod(${0}, ${1})"
);
// Add explict conversions
g
.
fresult
([](
const
shape
&
s
)
{
return
"migraphx::convert<"
+
shape
::
cpp_type
(
s
.
type
())
+
">"
;
});
auto
name
=
g
.
create_function
(
g
.
generate_module
(
*
pm
).
set_attributes
({
"__device__"
}).
set_generic_types
(
*
pm
));
std
::
string
lambda
=
"MIGRAPHX_LIFT("
+
name
+
")"
;
auto
op_names
=
get_op_names
(
*
pm
);
op_names
.
push_back
(
"kernel"
);
auto
op_name_string
=
join_strings
(
op_names
,
"_"
);
return
replace
(
compile_op
(
ctx
,
to_shapes
(
ins
->
inputs
()),
{{
"lambda"
,
lambda
},
{
"preamble"
,
g
.
str
()},
{
"kernel"
,
op_name_string
}}));
}
}
}
}
};
};
...
...
src/targets/gpu/jit/reduce.cpp
View file @
31065c7d
...
@@ -26,15 +26,7 @@
...
@@ -26,15 +26,7 @@
#include <migraphx/gpu/compile_hip_code_object.hpp>
#include <migraphx/gpu/compile_hip_code_object.hpp>
#include <migraphx/gpu/compile_hip.hpp>
#include <migraphx/gpu/compile_hip.hpp>
#include <migraphx/gpu/compile_gen.hpp>
#include <migraphx/gpu/compile_gen.hpp>
#include <migraphx/cpp_generator.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/reduce_dims.hpp>
#include <migraphx/reduce_dims.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/dead_code_elimination.hpp>
#include <migraphx/eliminate_common_subexpression.hpp>
#include <migraphx/module.hpp>
#include <migraphx/pass_manager.hpp>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
@@ -129,7 +121,7 @@ struct reduce_compiler : compiler<reduce_compiler>
...
@@ -129,7 +121,7 @@ struct reduce_compiler : compiler<reduce_compiler>
// Vectorize if the axis is a reduction axis
// Vectorize if the axis is a reduction axis
if
(
options
.
virtual_inputs
.
back
().
lens
()[
faxis
]
==
1
)
if
(
options
.
virtual_inputs
.
back
().
lens
()[
faxis
]
==
1
)
{
{
vec
=
vectorize
::
elements
(
faxis
,
options
.
virtual_inputs
);
vec
=
vectorize
::
elements
(
ctx
,
faxis
,
options
.
virtual_inputs
);
}
}
auto
relements
=
get_reduce_elements
(
options
.
virtual_inputs
)
/
vec
.
size
;
auto
relements
=
get_reduce_elements
(
options
.
virtual_inputs
)
/
vec
.
size
;
auto
nelements
=
options
.
virtual_inputs
.
back
().
elements
();
auto
nelements
=
options
.
virtual_inputs
.
back
().
elements
();
...
...
src/targets/gpu/jit/roialign.cpp
View file @
31065c7d
...
@@ -24,16 +24,7 @@
...
@@ -24,16 +24,7 @@
#include <migraphx/gpu/compiler.hpp>
#include <migraphx/gpu/compiler.hpp>
#include <migraphx/gpu/compile_hip_code_object.hpp>
#include <migraphx/gpu/compile_hip_code_object.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/compile_hip.hpp>
#include <migraphx/gpu/compile_hip.hpp>
#include <migraphx/cpp_generator.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/reduce_dims.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/dead_code_elimination.hpp>
#include <migraphx/eliminate_common_subexpression.hpp>
#include <migraphx/module.hpp>
#include <migraphx/pass_manager.hpp>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
...
src/targets/gpu/jit/scatternd.cpp
View file @
31065c7d
...
@@ -24,16 +24,8 @@
...
@@ -24,16 +24,8 @@
#include <migraphx/gpu/compiler.hpp>
#include <migraphx/gpu/compiler.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/compile_hip_code_object.hpp>
#include <migraphx/gpu/compile_hip_code_object.hpp>
#include <migraphx/gpu/compile_hip.hpp>
#include <migraphx/gpu/compile_hip.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/reduce_dims.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/dead_code_elimination.hpp>
#include <migraphx/eliminate_common_subexpression.hpp>
#include <migraphx/module.hpp>
#include <migraphx/pass_manager.hpp>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
@@ -87,9 +79,10 @@ struct scatternd_compiler : compiler<scatternd_compiler>
...
@@ -87,9 +79,10 @@ struct scatternd_compiler : compiler<scatternd_compiler>
{
{
assert
(
starts_with
(
op
.
name
(),
"scatternd_"
));
assert
(
starts_with
(
op
.
name
(),
"scatternd_"
));
auto
reduction
=
op
.
name
().
substr
(
10
);
auto
reduction
=
op
.
name
().
substr
(
10
);
return
insert
(
compile_op
(
ctx
,
return
insert
(
compile_op
(
to_shapes
({
ins
->
inputs
().
begin
()
+
1
,
ins
->
inputs
().
end
()}),
ctx
,
{{
"reduction"
,
reduction
}}));
to_shapes
(
std
::
vector
<
instruction_ref
>
{
ins
->
inputs
().
begin
()
+
1
,
ins
->
inputs
().
end
()}),
{{
"reduction"
,
reduction
}}));
}
}
compiler_replace
insert
(
const
operation
&
op
)
const
compiler_replace
insert
(
const
operation
&
op
)
const
...
...
src/targets/gpu/jit/softmax.cpp
View file @
31065c7d
...
@@ -26,20 +26,14 @@
...
@@ -26,20 +26,14 @@
#include <migraphx/gpu/compile_hip_code_object.hpp>
#include <migraphx/gpu/compile_hip_code_object.hpp>
#include <migraphx/gpu/compile_hip.hpp>
#include <migraphx/gpu/compile_hip.hpp>
#include <migraphx/gpu/compile_gen.hpp>
#include <migraphx/gpu/compile_gen.hpp>
#include <migraphx/cpp_generator.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/reduce_dims.hpp>
#include <migraphx/reduce_dims.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/dead_code_elimination.hpp>
#include <migraphx/eliminate_common_subexpression.hpp>
#include <migraphx/module.hpp>
#include <migraphx/pass_manager.hpp>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
gpu
{
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_USE_FAST_SOFTMAX
)
using
namespace
migraphx
::
gpu
::
gen
;
// NOLINT
using
namespace
migraphx
::
gpu
::
gen
;
// NOLINT
static
const
char
*
const
softmax_kernel
=
R"__migraphx__(
static
const
char
*
const
softmax_kernel
=
R"__migraphx__(
...
@@ -77,7 +71,7 @@ struct softmax_compiler : compiler<softmax_compiler>
...
@@ -77,7 +71,7 @@ struct softmax_compiler : compiler<softmax_compiler>
// Vectorize if the axis is a reduction axis
// Vectorize if the axis is a reduction axis
if
(
faxis
==
axis
)
if
(
faxis
==
axis
)
{
{
vec
=
vectorize
::
elements
(
faxis
,
inputs
);
vec
=
vectorize
::
elements
(
ctx
,
faxis
,
inputs
);
}
}
auto
relements
=
inputs
[
0
].
lens
()[
axis
]
/
vec
.
size
;
auto
relements
=
inputs
[
0
].
lens
()[
axis
]
/
vec
.
size
;
auto
nelements
=
(
inputs
.
back
().
elements
()
/
inputs
[
0
].
lens
()[
axis
]);
auto
nelements
=
(
inputs
.
back
().
elements
()
/
inputs
[
0
].
lens
()[
axis
]);
...
@@ -89,6 +83,9 @@ struct softmax_compiler : compiler<softmax_compiler>
...
@@ -89,6 +83,9 @@ struct softmax_compiler : compiler<softmax_compiler>
options
.
inputs
=
inputs
;
options
.
inputs
=
inputs
;
options
.
kernel_name
=
"softmax_kernel"
;
options
.
kernel_name
=
"softmax_kernel"
;
if
(
enabled
(
MIGRAPHX_USE_FAST_SOFTMAX
{}))
options
.
params
=
"-DMIGRAPHX_USE_FAST_SOFTMAX"
;
auto
src
=
interpolate_string
(
auto
src
=
interpolate_string
(
softmax_kernel
,
softmax_kernel
,
{{
"transformers"
,
make_transformer_args
(
vec
)},
{
"axis"
,
to_string
(
axis
)}});
{{
"transformers"
,
make_transformer_args
(
vec
)},
{
"axis"
,
to_string
(
axis
)}});
...
...
src/targets/gpu/kernel.cpp
View file @
31065c7d
...
@@ -80,7 +80,9 @@ void launch_kernel(hipFunction_t fun,
...
@@ -80,7 +80,9 @@ void launch_kernel(hipFunction_t fun,
std
::
size_t
global
,
std
::
size_t
global
,
std
::
size_t
local
,
std
::
size_t
local
,
void
*
kernargs
,
void
*
kernargs
,
std
::
size_t
size
)
std
::
size_t
size
,
hipEvent_t
start
,
hipEvent_t
stop
)
{
{
assert
(
global
>
0
);
assert
(
global
>
0
);
assert
(
local
>
0
);
assert
(
local
>
0
);
...
@@ -97,34 +99,55 @@ void launch_kernel(hipFunction_t fun,
...
@@ -97,34 +99,55 @@ void launch_kernel(hipFunction_t fun,
#endif
#endif
};
};
auto
status
=
hipExtModuleLaunchKernel
(
auto
status
=
hipExtModuleLaunchKernel
(
fun
,
fun
,
global
,
1
,
1
,
local
,
1
,
1
,
0
,
stream
,
nullptr
,
reinterpret_cast
<
void
**>
(
&
config
));
global
,
1
,
1
,
local
,
1
,
1
,
0
,
stream
,
nullptr
,
reinterpret_cast
<
void
**>
(
&
config
),
start
,
stop
);
if
(
status
!=
hipSuccess
)
if
(
status
!=
hipSuccess
)
MIGRAPHX_THROW
(
"Failed to launch kernel: "
+
hip_error
(
status
));
MIGRAPHX_THROW
(
"Failed to launch kernel: "
+
hip_error
(
status
));
if
(
stop
!=
nullptr
)
{
status
=
hipEventSynchronize
(
stop
);
if
(
status
!=
hipSuccess
)
MIGRAPHX_THROW
(
"Failed to sync event: "
+
hip_error
(
status
));
}
}
}
void
kernel
::
launch
(
hipStream_t
stream
,
void
kernel
::
launch
(
hipStream_t
stream
,
std
::
size_t
global
,
std
::
size_t
global
,
std
::
size_t
local
,
std
::
size_t
local
,
std
::
vector
<
void
*>
args
)
const
std
::
vector
<
void
*>
args
,
hipEvent_t
start
,
hipEvent_t
stop
)
const
{
{
assert
(
impl
!=
nullptr
);
assert
(
impl
!=
nullptr
);
void
*
kernargs
=
args
.
data
();
void
*
kernargs
=
args
.
data
();
std
::
size_t
size
=
args
.
size
()
*
sizeof
(
void
*
);
std
::
size_t
size
=
args
.
size
()
*
sizeof
(
void
*
);
launch_kernel
(
impl
->
fun
,
stream
,
global
,
local
,
kernargs
,
size
);
launch_kernel
(
impl
->
fun
,
stream
,
global
,
local
,
kernargs
,
size
,
start
,
stop
);
}
}
void
kernel
::
launch
(
hipStream_t
stream
,
void
kernel
::
launch
(
hipStream_t
stream
,
std
::
size_t
global
,
std
::
size_t
global
,
std
::
size_t
local
,
std
::
size_t
local
,
const
std
::
vector
<
kernel_argument
>&
args
)
const
const
std
::
vector
<
kernel_argument
>&
args
,
hipEvent_t
start
,
hipEvent_t
stop
)
const
{
{
assert
(
impl
!=
nullptr
);
assert
(
impl
!=
nullptr
);
std
::
vector
<
char
>
kernargs
=
pack_args
(
args
);
std
::
vector
<
char
>
kernargs
=
pack_args
(
args
);
std
::
size_t
size
=
kernargs
.
size
();
std
::
size_t
size
=
kernargs
.
size
();
launch_kernel
(
impl
->
fun
,
stream
,
global
,
local
,
kernargs
.
data
(),
size
);
launch_kernel
(
impl
->
fun
,
stream
,
global
,
local
,
kernargs
.
data
(),
size
,
start
,
stop
);
}
}
}
// namespace gpu
}
// namespace gpu
...
...
src/targets/gpu/kernels/include/migraphx/kernels/algorithm.hpp
View file @
31065c7d
...
@@ -163,7 +163,7 @@ constexpr Iterator1 search(Iterator1 first, Iterator1 last, Iterator2 s_first, I
...
@@ -163,7 +163,7 @@ constexpr Iterator1 search(Iterator1 first, Iterator1 last, Iterator2 s_first, I
{
{
return
last
;
return
last
;
}
}
if
(
!
(
*
it
==
*
s_it
))
if
(
not
(
*
it
==
*
s_it
))
{
{
break
;
break
;
}
}
...
...
src/targets/gpu/kernels/include/migraphx/kernels/array.hpp
View file @
31065c7d
...
@@ -33,49 +33,95 @@
...
@@ -33,49 +33,95 @@
namespace
migraphx
{
namespace
migraphx
{
// NOLINTNEXTLINE
// NOLINTNEXTLINE
#define MIGRAPHX_DEVICE_ARRAY_OP(op, binary_op) \
#define MIGRAPHX_DEVICE_ARRAY_OP(op, binary_op) \
template <class U> \
template <class U> \
constexpr array& operator op(const array<U, N>& x) \
constexpr array& operator op(const array<U, N>& x) \
{ \
{ \
for(index_int i = 0; i < N; i++) \
array_detail::array_for_each(*this, x)([](auto& sy, auto sx) { sy op sx; }); \
d[i] op x[i]; \
return *this; \
return *this; \
} \
} \
template <class U, MIGRAPHX_REQUIRES(is_convertible<U, T>{})> \
template <class U, MIGRAPHX_REQUIRES(is_convertible<U, T>{})> \
constexpr array& operator op(const U& x) \
constexpr array& operator op(const U& x) \
{ \
{ \
array_detail::array_for_each (*this)([&](auto& sy) { sy op x; }); \
for(index_int i = 0; i < N; i++) \
return *this; \
d[i] op x; \
} \
return *this; \
template <class U> \
} \
friend constexpr auto operator binary_op(const array& x, const array<U, N>& y) \
template <class U> \
{ \
friend constexpr auto operator binary_op(const array& x, const array<U, N>& y) \
array<decltype(T {} binary_op U{}), N> z{}; \
{ \
array_detail::array_for_each(z, x, y)( \
array<decltype(T {} binary_op U{}), N> z{}; \
[&](auto& sz, auto sx, auto sy) { sz = sx binary_op sy; }); \
for(index_int i = 0; i < N; i++) \
return z; \
z[i] = x[i] binary_op y[i]; \
} \
return z; \
template <class U, MIGRAPHX_REQUIRES(is_convertible<U, T>{})> \
} \
friend constexpr auto operator binary_op(const array& x, const U& y) \
template <class U, MIGRAPHX_REQUIRES(is_convertible<U, T>{})> \
{ \
friend constexpr auto operator binary_op(const array& x, const U& y) \
array<decltype(T {} binary_op U{}), N> z{}; \
{ \
array_detail::array_for_each(z, x)([&](auto& sz, auto sx) { sz = sx binary_op y; }); \
array<decltype(T {} binary_op U{}), N> z{}; \
return z; \
for(index_int i = 0; i < N; i++) \
} \
z[i] = x[i] binary_op y; \
template <class U, MIGRAPHX_REQUIRES(is_convertible<U, T>{})> \
return z; \
friend constexpr auto operator binary_op(const U& x, const array& y) \
} \
{ \
template <class U, MIGRAPHX_REQUIRES(is_convertible<U, T>{})> \
array<decltype(T {} binary_op U{}), N> z{}; \
friend constexpr auto operator binary_op(const U& x, const array& y) \
array_detail::array_for_each(z, y)([&](auto& sz, auto sy) { sz = x binary_op sy; }); \
{ \
return z; \
array<decltype(T {} binary_op U{}), N> z{}; \
for(index_int i = 0; i < N; i++) \
z[i] = x binary_op y[i]; \
return z; \
}
}
namespace
array_detail
{
template
<
class
T
>
constexpr
auto
is_vectorizable
()
{
return
not
is_same
<
T
,
bool
>
{}
and
(
is_fundamental
<
T
>
{}
or
is_same
<
T
,
half
>
{});
}
template
<
class
T
>
__device__
auto
&
array2vec
(
T
&
x
)
{
using
value_type
=
typename
T
::
value_type
;
constexpr
auto
size
=
decltype
(
x
.
size
()){};
using
type
=
vec
<
value_type
,
size
>
;
if
constexpr
(
is_const
<
T
>
{})
return
reinterpret_cast
<
const
type
&>
(
x
);
else
return
reinterpret_cast
<
type
&>
(
x
);
}
template
<
class
T
,
class
...
Ts
>
constexpr
auto
array_for_each
(
T
&
x
,
Ts
&
...
xs
)
{
MIGRAPHX_ASSERT
(((
x
.
size
()
==
xs
.
size
())
and
...));
return
[
&
](
auto
f
)
{
constexpr
auto
size
=
decltype
(
x
.
size
()){};
if
constexpr
((
is_vectorizable
<
typename
T
::
value_type
>
()
or
(
is_vectorizable
<
typename
Ts
::
value_type
>
()
or
...))
and
size
<=
8
and
size
>
1
and
(
size
%
2
==
0
))
{
if
(
__builtin_is_constant_evaluated
())
{
for
(
index_int
i
=
0
;
i
<
size
;
i
++
)
f
(
x
[
i
],
xs
[
i
]...);
}
else
{
using
vec_type
=
std
::
remove_reference_t
<
decltype
(
array2vec
(
x
))
>
;
f
(
array2vec
(
x
),
__builtin_convertvector
(
array2vec
(
xs
),
vec_type
)...);
}
}
else
{
for
(
index_int
i
=
0
;
i
<
size
;
i
++
)
f
(
x
[
i
],
xs
[
i
]...);
}
};
}
}
// namespace array_detail
template
<
class
T
,
index_int
N
>
template
<
class
T
,
index_int
N
>
struct
array
struct
array
{
{
using
value_type
=
T
;
T
d
[
N
];
T
d
[
N
];
constexpr
T
&
operator
[](
index_int
i
)
constexpr
T
&
operator
[](
index_int
i
)
{
{
...
@@ -108,18 +154,13 @@ struct array
...
@@ -108,18 +154,13 @@ struct array
constexpr
T
dot
(
const
array
&
x
)
const
constexpr
T
dot
(
const
array
&
x
)
const
{
{
T
result
=
0
;
auto
r
=
x
*
(
*
this
);
for
(
index_int
i
=
0
;
i
<
N
;
i
++
)
return
r
.
reduce
([](
auto
a
,
auto
b
)
{
return
a
+
b
;
},
0
);
result
+=
x
[
i
]
*
d
[
i
];
return
result
;
}
}
constexpr
T
product
()
const
constexpr
T
product
()
const
{
{
T
result
=
1
;
return
reduce
([](
auto
x
,
auto
y
)
{
return
x
*
y
;
},
1
);
for
(
index_int
i
=
0
;
i
<
N
;
i
++
)
result
*=
d
[
i
];
return
result
;
}
}
constexpr
T
single
(
index_int
width
=
100
)
const
constexpr
T
single
(
index_int
width
=
100
)
const
...
@@ -134,6 +175,24 @@ struct array
...
@@ -134,6 +175,24 @@ struct array
return
result
;
return
result
;
}
}
template
<
class
F
>
constexpr
auto
apply
(
F
f
)
const
{
array
<
decltype
(
f
(
d
[
0
])),
N
>
result
;
for
(
index_int
i
=
0
;
i
<
N
;
i
++
)
result
[
i
]
=
f
(
d
[
i
]);
return
result
;
}
template
<
class
F
>
constexpr
auto
reduce
(
F
f
,
T
init
)
const
{
T
result
=
init
;
for
(
index_int
i
=
0
;
i
<
N
;
i
++
)
result
=
f
(
result
,
d
[
i
]);
return
result
;
}
MIGRAPHX_DEVICE_ARRAY_OP
(
+=
,
+
)
MIGRAPHX_DEVICE_ARRAY_OP
(
+=
,
+
)
MIGRAPHX_DEVICE_ARRAY_OP
(
-=
,
-
)
MIGRAPHX_DEVICE_ARRAY_OP
(
-=
,
-
)
MIGRAPHX_DEVICE_ARRAY_OP
(
*=
,
*
)
MIGRAPHX_DEVICE_ARRAY_OP
(
*=
,
*
)
...
@@ -153,7 +212,7 @@ struct array
...
@@ -153,7 +212,7 @@ struct array
return
true
;
return
true
;
}
}
friend
constexpr
bool
operator
!=
(
const
array
&
x
,
const
array
&
y
)
{
return
!
(
x
==
y
);
}
friend
constexpr
bool
operator
!=
(
const
array
&
x
,
const
array
&
y
)
{
return
not
(
x
==
y
);
}
// This uses the product order rather than lexical order
// This uses the product order rather than lexical order
friend
constexpr
bool
operator
<
(
const
array
&
x
,
const
array
&
y
)
friend
constexpr
bool
operator
<
(
const
array
&
x
,
const
array
&
y
)
{
{
...
@@ -201,6 +260,11 @@ struct array
...
@@ -201,6 +260,11 @@ struct array
}
}
};
};
template
<
class
T
,
class
...
Ts
>
constexpr
array
<
T
,
sizeof
...(
Ts
)
+
1
>
make_array
(
T
x
,
Ts
...
xs
)
{
return
{
x
,
static_cast
<
T
>
(
xs
)...};
}
template
<
class
T
,
T
...
Xs
>
template
<
class
T
,
T
...
Xs
>
struct
integral_const_array
:
array
<
T
,
sizeof
...(
Xs
)
>
struct
integral_const_array
:
array
<
T
,
sizeof
...(
Xs
)
>
{
{
...
...
src/targets/gpu/include/migraphx/
gpu/max
.hpp
→
src/targets/gpu/
kernels/
include/migraphx/
kernels/concat
.hpp
View file @
31065c7d
...
@@ -21,22 +21,46 @@
...
@@ -21,22 +21,46 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* THE SOFTWARE.
*/
*/
#ifndef MIGRAPHX_GUARD_RTGLIB_MAX_HPP
#define MIGRAPHX_GUARD_RTGLIB_MAX_HPP
#include <migraphx/gpu/oper.hpp>
#include <migraphx/kernels/index.hpp>
#include <migraphx/gpu/device/max.hpp>
#include <migraphx/kernels/functional.hpp>
#include <migraphx/kernels/tensor_view.hpp>
#ifndef MIGRAPHX_GUARD_KERNELS_CONCAT_HPP
#define MIGRAPHX_GUARD_KERNELS_CONCAT_HPP
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
struct
hip_max
:
binary_device
<
hip_max
,
device
::
max
>
template
<
index_int
Axis
,
class
Output
,
class
Input
,
class
Start
>
constexpr
auto
concat_slice
(
Output
out
,
Input
,
Start
)
{
{
};
constexpr
auto
lens
=
get_shape_c
<
Input
>
{}.
lens
;
constexpr
auto
strides
=
get_shape_c
<
Output
>
{}.
strides
;
constexpr
auto
offset
=
return_c
([]
{
constexpr
auto
output_shape
=
get_shape_c
<
Output
>
{};
return
Start
{}
*
output_shape
.
strides
[
Axis
];
});
constexpr
auto
s
=
make_shape
(
lens
,
strides
);
return
make_tensor_view
(
&
out
[
offset
],
s
);
}
}
// namespace gpu
template
<
index_int
Axis
,
class
Input
>
}
// namespace MIGRAPHX_INLINE_NS
constexpr
auto
concat_ends
(
Input
)
}
// namespace migraphx
{
constexpr
auto
lens
=
get_shape_c
<
Input
>
{}.
lens
;
return
_c
<
lens
[
Axis
]
>
;
}
#endif
template
<
index_int
Axis
,
class
Output
,
class
...
Inputs
>
__device__
void
concat
(
Output
output
,
Inputs
...
inputs
)
{
auto
idx
=
make_index
();
fold
([
&
](
auto
start
,
auto
input
)
{
auto
y
=
concat_slice
<
Axis
>
(
output
,
input
,
start
);
idx
.
global_stride
(
input
.
get_shape
().
elements
(),
[
&
](
auto
i
)
{
y
[
i
]
=
input
[
i
];
});
return
start
+
concat_ends
<
Axis
>
(
input
);
})(
_c
<
0
>
,
inputs
...);
}
}
// namespace migraphx
#endif // MIGRAPHX_GUARD_KERNELS_CONCAT_HPP
src/targets/gpu/kernels/include/migraphx/kernels/functional.hpp
View file @
31065c7d
...
@@ -31,8 +31,9 @@
...
@@ -31,8 +31,9 @@
->decltype(__VA_ARGS__) { return __VA_ARGS__; }
->decltype(__VA_ARGS__) { return __VA_ARGS__; }
// NOLINTNEXTLINE
// NOLINTNEXTLINE
#define MIGRAPHX_LIFT(...) \
#define MIGRAPHX_LIFT(...) \
[](auto&&... xs) MIGRAPHX_RETURNS((__VA_ARGS__)(static_cast<decltype(xs)>(xs)...))
[](auto&&... private_lisft_xs) MIGRAPHX_RETURNS( \
(__VA_ARGS__)(static_cast<decltype(private_lisft_xs)>(private_lisft_xs)...))
namespace
migraphx
{
namespace
migraphx
{
...
...
src/targets/gpu/kernels/include/migraphx/kernels/index.hpp
View file @
31065c7d
...
@@ -28,9 +28,60 @@
...
@@ -28,9 +28,60 @@
#include <migraphx/kernels/types.hpp>
#include <migraphx/kernels/types.hpp>
#include <migraphx/kernels/integral_constant.hpp>
#include <migraphx/kernels/integral_constant.hpp>
#include <migraphx/kernels/type_traits.hpp>
#include <migraphx/kernels/type_traits.hpp>
#include <migraphx/kernels/debug.hpp>
namespace
migraphx
{
namespace
migraphx
{
#if defined(MIGRAPHX_NGLOBAL) && defined(MIGRAPHX_NLOCAL)
#define MIGRAPHX_NGROUP ((MIGRAPHX_NGLOBAL + MIGRAPHX_NLOCAL - 1) / MIGRAPHX_NLOCAL)
#endif
inline
__device__
__attribute__
((
const
))
index_int
compute_global_size
()
{
#ifdef MIGRAPHX_NGLOBAL
return
MIGRAPHX_NGLOBAL
;
#else
// This actualy works even when global is not divisible by local size.
// This doesnt actually do a multiplicatiosn. Instead it calls a device
// function to get the global size, which is why it works.
return
blockDim
.
x
*
gridDim
.
x
;
// NOLINT
#endif
}
// We cant just use blockDim.x to get the local size since its broken on hip
// when global is not divisible by local size. In this case, we calulate the
// size for the last group.
inline
__device__
__attribute__
((
const
))
index_int
compute_local_size
()
{
#ifdef MIGRAPHX_NLOCAL
const
auto
nlocal
=
MIGRAPHX_NLOCAL
;
#else
const
auto
nlocal
=
blockDim
.
x
;
// NOLINT
#endif
#ifdef MIGRAPHX_NGROUP
const
auto
ngroup
=
MIGRAPHX_NGROUP
;
#else
const
auto
ngroup
=
gridDim
.
x
;
// NOLINT
#endif
const
auto
group_id
=
blockIdx
.
x
;
// NOLINT
const
auto
nglobal
=
compute_global_size
();
if
(
group_id
==
ngroup
-
1
)
{
return
1
+
(
nglobal
-
1
)
%
nlocal
;
}
else
{
return
nlocal
;
// NOLINT
}
}
#ifdef MIGRAPHX_NGROUP
// If global is divisible by local then local can be a const
#if(MIGRAPHX_NGLOBAL % MIGRAPHX_NLOCAL == 0) || (MIGRAPHX_NGROUP == 1)
#define MIGRAPHX_HAS_CONST_LOCAL 1
#endif
#endif
struct
index
struct
index
{
{
index_int
global
=
0
;
index_int
global
=
0
;
...
@@ -38,20 +89,44 @@ struct index
...
@@ -38,20 +89,44 @@ struct index
index_int
group
=
0
;
index_int
group
=
0
;
#ifdef MIGRAPHX_NGLOBAL
#ifdef MIGRAPHX_NGLOBAL
constexpr
index_constant
<
MIGRAPHX_NGLOBAL
>
nglobal
()
const
{
return
{};
}
constexpr
index_constant
<
MIGRAPHX_NGLOBAL
>
nglobal
()
const
{
static_assert
(
MIGRAPHX_NGLOBAL
>
0
,
"Global size must be greater than 0"
);
return
{};
}
#else
#else
__device__
index_int
nglobal
()
const
__device__
index_int
nglobal
()
const
{
{
return
blockDim
.
x
*
gridDim
.
x
;
// NOLINT
MIGRAPHX_ASSERT
(
compute_global_size
()
>
0
);
return
compute_global_size
();
// NOLINT
}
}
#endif
#endif
#ifdef MIGRAPHX_NLOCAL
#ifdef MIGRAPHX_HAS_CONST_LOCAL
constexpr
index_constant
<
MIGRAPHX_NLOCAL
>
nlocal
()
const
{
return
{};
}
constexpr
index_constant
<
MIGRAPHX_NLOCAL
>
nlocal
()
const
{
static_assert
(
MIGRAPHX_NLOCAL
>
0
,
"Local size must be greater than 0"
);
return
{};
}
#else
#else
__device__
index_int
nlocal
()
const
__device__
index_int
nlocal
()
const
{
{
return
blockDim
.
x
;
// NOLINT
#ifdef MIGRAPHX_NGROUP
static_assert
((
MIGRAPHX_NGLOBAL
%
MIGRAPHX_NLOCAL
!=
0
)
and
(
MIGRAPHX_NGROUP
>
1
),
"Local size should be const"
);
#endif
MIGRAPHX_ASSERT
(
compute_local_size
()
>
0
);
return
compute_local_size
();
// NOLINT
}
#endif
#ifdef MIGRAPHX_NLOCAL
constexpr
index_constant
<
MIGRAPHX_NLOCAL
>
max_nlocal
()
const
{
return
{};
}
#else
__device__
index_int
max_nlocal
()
const
{
MIGRAPHX_ASSERT
(
blockDim
.
x
>
0
);
return
blockDim
.
x
;
}
}
#endif
#endif
template
<
class
N
,
class
Stride
>
template
<
class
N
,
class
Stride
>
...
@@ -63,6 +138,7 @@ struct index
...
@@ -63,6 +138,7 @@ struct index
template
<
class
F
,
class
N
,
class
Stride
>
template
<
class
F
,
class
N
,
class
Stride
>
static
constexpr
void
for_stride
(
index_int
start
,
N
n
,
Stride
stride
,
F
f
)
static
constexpr
void
for_stride
(
index_int
start
,
N
n
,
Stride
stride
,
F
f
)
{
{
MIGRAPHX_ASSERT
(
start
<
stride
);
if
constexpr
(
not
is_integral
<
N
>
{}
and
not
is_integral
<
Stride
>
{}
and
if
constexpr
(
not
is_integral
<
N
>
{}
and
not
is_integral
<
Stride
>
{}
and
max_stride_iterations
(
n
,
stride
)
==
1
)
max_stride_iterations
(
n
,
stride
)
==
1
)
{
{
...
...
src/targets/gpu/kernels/include/migraphx/kernels/integral_constant.hpp
View file @
31065c7d
...
@@ -73,10 +73,10 @@ MIGRAPHX_INTEGRAL_CONSTANT_BINARY_OP(!=)
...
@@ -73,10 +73,10 @@ MIGRAPHX_INTEGRAL_CONSTANT_BINARY_OP(!=)
MIGRAPHX_INTEGRAL_CONSTANT_BINARY_OP
(
&
)
MIGRAPHX_INTEGRAL_CONSTANT_BINARY_OP
(
&
)
MIGRAPHX_INTEGRAL_CONSTANT_BINARY_OP
(
^
)
MIGRAPHX_INTEGRAL_CONSTANT_BINARY_OP
(
^
)
MIGRAPHX_INTEGRAL_CONSTANT_BINARY_OP
(
|
)
MIGRAPHX_INTEGRAL_CONSTANT_BINARY_OP
(
|
)
MIGRAPHX_INTEGRAL_CONSTANT_BINARY_OP
(
&&
)
MIGRAPHX_INTEGRAL_CONSTANT_BINARY_OP
(
and
)
MIGRAPHX_INTEGRAL_CONSTANT_BINARY_OP
(
||
)
MIGRAPHX_INTEGRAL_CONSTANT_BINARY_OP
(
or
)
MIGRAPHX_INTEGRAL_CONSTANT_UNARY_OP
(
!
)
MIGRAPHX_INTEGRAL_CONSTANT_UNARY_OP
(
not
)
MIGRAPHX_INTEGRAL_CONSTANT_UNARY_OP
(
~
)
MIGRAPHX_INTEGRAL_CONSTANT_UNARY_OP
(
~
)
MIGRAPHX_INTEGRAL_CONSTANT_UNARY_OP
(
+
)
MIGRAPHX_INTEGRAL_CONSTANT_UNARY_OP
(
+
)
MIGRAPHX_INTEGRAL_CONSTANT_UNARY_OP
(
-
)
MIGRAPHX_INTEGRAL_CONSTANT_UNARY_OP
(
-
)
...
...
Prev
1
…
14
15
16
17
18
19
20
21
22
…
25
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment