Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
e7f7ea10
Commit
e7f7ea10
authored
Nov 30, 2022
by
Alan Turner
Browse files
Merge remote-tracking branch 'origin/optimize' into ck-gsg
parents
9c6ba1ed
ae13eb93
Changes
157
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
661 additions
and
501 deletions
+661
-501
src/targets/gpu/include/migraphx/gpu/convolution.hpp
src/targets/gpu/include/migraphx/gpu/convolution.hpp
+282
-16
src/targets/gpu/include/migraphx/gpu/elu.hpp
src/targets/gpu/include/migraphx/gpu/elu.hpp
+0
-64
src/targets/gpu/include/migraphx/gpu/mlir.hpp
src/targets/gpu/include/migraphx/gpu/mlir.hpp
+2
-1
src/targets/gpu/include/migraphx/gpu/perfdb.hpp
src/targets/gpu/include/migraphx/gpu/perfdb.hpp
+1
-1
src/targets/gpu/include/migraphx/gpu/rocblas.hpp
src/targets/gpu/include/migraphx/gpu/rocblas.hpp
+1
-1
src/targets/gpu/jit/concat.cpp
src/targets/gpu/jit/concat.cpp
+28
-11
src/targets/gpu/jit/mlir.cpp
src/targets/gpu/jit/mlir.cpp
+1
-1
src/targets/gpu/jit/pad.cpp
src/targets/gpu/jit/pad.cpp
+100
-0
src/targets/gpu/jit/pointwise.cpp
src/targets/gpu/jit/pointwise.cpp
+3
-3
src/targets/gpu/jit/scatternd.cpp
src/targets/gpu/jit/scatternd.cpp
+4
-3
src/targets/gpu/kernels/include/migraphx/kernels/concat.hpp
src/targets/gpu/kernels/include/migraphx/kernels/concat.hpp
+21
-9
src/targets/gpu/kernels/include/migraphx/kernels/pad.hpp
src/targets/gpu/kernels/include/migraphx/kernels/pad.hpp
+63
-0
src/targets/gpu/kernels/include/migraphx/kernels/ranges.hpp
src/targets/gpu/kernels/include/migraphx/kernels/ranges.hpp
+49
-0
src/targets/gpu/leaky_relu.cpp
src/targets/gpu/leaky_relu.cpp
+0
-65
src/targets/gpu/lowering.cpp
src/targets/gpu/lowering.cpp
+14
-108
src/targets/gpu/mlir.cpp
src/targets/gpu/mlir.cpp
+72
-10
src/targets/gpu/perfdb.cpp
src/targets/gpu/perfdb.cpp
+6
-5
src/targets/gpu/prefuse_ops.cpp
src/targets/gpu/prefuse_ops.cpp
+3
-1
src/targets/gpu/quant_convolution.cpp
src/targets/gpu/quant_convolution.cpp
+0
-194
src/targets/gpu/target.cpp
src/targets/gpu/target.cpp
+11
-8
No files found.
src/targets/gpu/include/migraphx/gpu/convolution.hpp
View file @
e7f7ea10
...
@@ -25,18 +25,40 @@
...
@@ -25,18 +25,40 @@
#define MIGRAPHX_GUARD_RTGLIB_CONVOLUTION_HPP
#define MIGRAPHX_GUARD_RTGLIB_CONVOLUTION_HPP
#include <migraphx/shape.hpp>
#include <migraphx/shape.hpp>
#include <migraphx/op/convolution.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/operation.hpp>
#include <migraphx/register_op.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <migraphx/op/identity.hpp>
#include <migraphx/op/convolution.hpp>
#include <migraphx/op/quant_convolution.hpp>
#include <migraphx/op/deconvolution.hpp>
#include <unordered_map>
#include <migraphx/reflect.hpp>
#include <migraphx/gpu/context.hpp>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
gpu
{
struct
context
;
inline
shape
reshape_if_1d
(
const
shape
&
input
)
{
shape
new_shape
{
input
};
auto
dims
=
new_shape
.
lens
();
if
(
dims
.
size
()
==
3
)
{
std
::
vector
<
size_t
>
new_dims
=
dims
;
new_dims
.
insert
(
new_dims
.
begin
()
+
2
,
1
);
new_shape
=
shape
{
input
.
type
(),
new_dims
};
}
return
new_shape
;
}
template
<
class
Op
>
struct
miopen_convolution
struct
miopen_convolution
{
{
op
::
convolution
op
;
Op
op
;
bool
int8_x4_format
=
false
;
shared
<
convolution_descriptor
>
cd
=
nullptr
;
shared
<
convolution_descriptor
>
cd
=
nullptr
;
miopenConvFwdAlgorithm_t
algo
{};
miopenConvFwdAlgorithm_t
algo
{};
#ifdef MIGRAPHX_HAS_FIND_2_API
#ifdef MIGRAPHX_HAS_FIND_2_API
...
@@ -48,29 +70,273 @@ struct miopen_convolution
...
@@ -48,29 +70,273 @@ struct miopen_convolution
template
<
class
Self
,
class
F
>
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
static
auto
reflect
(
Self
&
self
,
F
f
)
{
{
return
pack
(
f
(
self
.
op
.
padding
,
"padding"
),
return
pack
(
f
(
self
.
op
,
"op"
),
f
(
self
.
op
.
stride
,
"stride"
),
f
(
self
.
op
.
dilation
,
"dilation"
),
f
(
self
.
op
.
group
,
"group"
),
f
(
self
.
op
.
padding_mode
,
"padding_mode"
),
#ifdef MIGRAPHX_HAS_FIND_2_API
#ifdef MIGRAPHX_HAS_FIND_2_API
f
(
self
.
solution_object
,
"solution_object"
),
f
(
self
.
solution_object
,
"solution_object"
),
#endif
#endif
f
(
self
.
algo
,
"algo"
),
f
(
self
.
int8_x4_format
,
"int8_x4_format"
),
f
(
self
.
solution_id
,
"solution_id"
));
f
(
self
.
solution_id
,
"solution_id"
));
}
}
std
::
string
name
()
const
{
return
"gpu::convolution"
;
}
std
::
string
name
()
const
{
return
"gpu::"
+
op
.
name
();
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
;
inline
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
check_shapes
{
inputs
,
op
}.
has
(
4
);
std
::
vector
<
shape
>
conv_inputs
(
inputs
.
begin
(),
inputs
.
begin
()
+
2
);
check_shapes
{
conv_inputs
,
*
this
}.
max_ndims
(
5
).
packed_layouts
(
{{
0
,
1
,
2
},
{
0
,
1
,
2
,
3
},
{
0
,
2
,
3
,
1
},
{
0
,
1
,
2
,
3
,
4
}});
return
migraphx
::
compute_shape
<
Op
>
(
op
,
conv_inputs
);
}
argument
argument
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
;
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
shape
find
(
context
&
ctx
,
const
shape
&
output_shape
,
std
::
vector
<
shape
>
inputs
);
{
void
finalize
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
shape
>&
inputs
);
auto
x_desc
=
make_tensor
(
reshape_if_1d
(
args
[
0
].
get_shape
()),
int8_x4_format
);
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
auto
w_desc
=
make_tensor
(
reshape_if_1d
(
args
[
1
].
get_shape
()),
int8_x4_format
);
auto
y_desc
=
make_tensor
(
reshape_if_1d
(
output_shape
));
auto
*
miopen_stream_handle
=
ctx
.
get_stream
().
get_miopen
();
auto
workspace_size
=
args
[
2
].
get_shape
().
bytes
();
#ifdef MIGRAPHX_HAS_FIND_2_API
{
const
miopenTensorArgument_t
tensor_args
[
3
]
=
{
{
miopenTensorConvolutionX
,
nullptr
,
args
[
0
].
implicit
()},
{
miopenTensorConvolutionW
,
nullptr
,
args
[
1
].
implicit
()},
{
miopenTensorConvolutionY
,
nullptr
,
args
[
3
].
implicit
()},
};
if
(
solution_ptr
.
get
()
==
nullptr
)
MIGRAPHX_THROW
(
"MIOpen "
+
op
.
name
()
+
" : Load MIOpen Solution before running it"
);
auto
status
=
miopenRunSolution
(
miopen_stream_handle
,
solution_ptr
.
get
(),
3
,
tensor_args
,
args
[
2
].
implicit
(),
workspace_size
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen "
+
op
.
name
()
+
" : running convolution using find_2.0 failed"
);
return
args
[
3
];
}
#else
// else use immediate mode
if
(
solution_id
==
0
)
MIGRAPHX_THROW
(
"MIOpen "
+
op
.
name
()
+
" : invalid solution ID"
);
auto
status
=
miopenConvolutionForwardImmediate
(
miopen_stream_handle
,
w_desc
.
get
(),
args
[
1
].
implicit
(),
x_desc
.
get
(),
args
[
0
].
implicit
(),
cd
.
get
(),
y_desc
.
get
(),
args
[
3
].
implicit
(),
args
[
2
].
implicit
(),
workspace_size
,
solution_id
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen "
+
op
.
name
()
+
": running convolution failed"
);
return
args
[
3
];
#endif
}
void
set_conv_descriptor
()
{
cd
=
(
op
.
name
()
==
"deconvolution"
)
?
make_deconv
(
op
)
:
make_conv
(
op
);
}
value
compile
(
migraphx
::
context
&
ctx
,
const
shape
&
output
,
const
std
::
vector
<
shape
>&
input
)
{
set_conv_descriptor
();
auto
ws
=
find
(
any_cast
<
migraphx
::
gpu
::
context
>
(
ctx
),
output
,
input
);
return
{{
"workspace"
,
ws
.
bytes
()}};
}
shape
find
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
shape
>&
inputs
)
{
shape
workspace_shape
{};
auto
x_desc
=
make_tensor
(
reshape_if_1d
(
inputs
[
0
]),
int8_x4_format
);
auto
w_desc
=
make_tensor
(
reshape_if_1d
(
inputs
[
1
]),
int8_x4_format
);
auto
y_desc
=
make_tensor
(
reshape_if_1d
(
output_shape
));
std
::
size_t
workspace_size
=
0
;
#ifdef MIGRAPHX_HAS_FIND_2_API
{
auto
conv_problem
=
make_obj
<
miopen_problem
>
(
&
miopenCreateConvProblem
,
cd
.
get
(),
miopenProblemDirectionForward
);
set_tensor_descriptor
(
miopenTensorConvolutionX
,
x_desc
,
conv_problem
);
set_tensor_descriptor
(
miopenTensorConvolutionW
,
w_desc
,
conv_problem
);
set_tensor_descriptor
(
miopenTensorConvolutionY
,
y_desc
,
conv_problem
);
auto
*
miopen_stream_handle
=
ctx
.
get_stream
().
get_miopen
();
solution_ptr
=
find_solution
(
miopen_stream_handle
,
conv_problem
.
get
());
auto
status
=
miopenGetSolutionWorkspaceSize
(
solution_ptr
.
get
(),
&
workspace_size
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen"
+
op
.
name
()
+
" : failed to get solution's workspace size"
);
std
::
size_t
solution_size
;
status
=
miopenGetSolutionSize
(
solution_ptr
.
get
(),
&
solution_size
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen"
+
op
.
name
()
+
": Failed to fetch solution size"
);
auto
solution_binary
=
std
::
vector
<
char
>
{};
solution_binary
.
resize
(
solution_size
);
status
=
miopenSaveSolution
(
solution_ptr
.
get
(),
solution_binary
.
data
());
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen"
+
op
.
name
()
+
": Saving solution failed"
);
solution_object
=
value
::
binary
{
solution_binary
.
data
(),
solution_size
};
return
shape
{
shape
::
int8_type
,
{
workspace_size
}};
}
#else
auto
status
=
miopenConvolutionForwardGetWorkSpaceSize
(
ctx
.
get_stream
().
get_miopen
(),
w_desc
.
get
(),
x_desc
.
get
(),
cd
.
get
(),
y_desc
.
get
(),
&
workspace_size
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen"
+
op
.
name
()
+
" : Failed to get forward workspace size"
);
workspace_shape
=
shape
{
shape
::
int8_type
,
{
workspace_size
}};
auto
x_shape
=
inputs
[
0
];
auto
w_shape
=
inputs
[
1
];
if
(
int8_x4_format
)
{
x_shape
=
pack_int8_shape
(
x_shape
);
w_shape
=
pack_int8_shape
(
w_shape
);
}
auto
x
=
to_gpu
(
generate_argument
(
x_shape
));
auto
w
=
to_gpu
(
generate_argument
(
w_shape
));
auto
y
=
allocate_gpu
(
output_shape
);
auto
workspace
=
allocate_gpu
(
workspace_shape
);
int
algo_count
=
1
;
miopenConvAlgoPerf_t
perf
;
status
=
miopenFindConvolutionForwardAlgorithm
(
ctx
.
get_stream
().
get_miopen
(),
x_desc
.
get
(),
x
.
implicit
(),
w_desc
.
get
(),
w
.
implicit
(),
cd
.
get
(),
y_desc
.
get
(),
y
.
implicit
(),
1
,
&
algo_count
,
&
perf
,
workspace
.
implicit
(),
workspace_size
,
false
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen "
+
op
.
name
()
+
" : find convolution failed"
);
algo
=
perf
.
fwd_algo
;
size_t
solution_count
;
status
=
miopenConvolutionForwardGetSolutionCount
(
ctx
.
get_stream
().
get_miopen
(),
w_desc
.
get
(),
x_desc
.
get
(),
cd
.
get
(),
y_desc
.
get
(),
&
solution_count
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen "
+
op
.
name
()
+
": get solution count failed"
);
std
::
vector
<
miopenConvSolution_t
>
solutions
(
solution_count
);
status
=
miopenConvolutionForwardGetSolution
(
ctx
.
get_stream
().
get_miopen
(),
w_desc
.
get
(),
x_desc
.
get
(),
cd
.
get
(),
y_desc
.
get
(),
solution_count
,
&
solution_count
,
solutions
.
data
());
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen "
+
op
.
name
()
+
": get solution failed"
);
solution_id
=
solutions
.
front
().
solution_id
;
return
shape
{
shape
::
int8_type
,
{
perf
.
memory
}};
#endif
}
void
finalize
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
shape
>&
inputs
)
{
#ifdef MIGRAPHX_HAS_FIND_2_API
{
(
void
)(
ctx
);
// avoid warnings
(
void
)(
output_shape
);
(
void
)(
inputs
);
// load solution
if
(
solution_ptr
==
nullptr
)
{
miopenSolution_t
ptr
;
auto
status
=
miopenLoadSolution
(
&
ptr
,
reinterpret_cast
<
const
char
*>
(
solution_object
.
data
()),
solution_object
.
size
());
solution_ptr
=
miopen_solution
{
ptr
};
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen "
+
op
.
name
()
+
": loading convolution solution failed"
);
}
}
#else
// Use immediate mode API
{
set_conv_descriptor
();
if
(
solution_id
==
0
)
{
// Check that workspace hasn't changed
auto
size
=
inputs
.
at
(
2
).
bytes
();
auto
ws
=
find
(
ctx
,
output_shape
,
inputs
);
if
(
ws
.
bytes
()
>
size
)
MIGRAPHX_THROW
(
"MIOpen "
+
op
.
name
()
+
": workspace has changed during finalization."
);
}
auto
x_desc
=
make_tensor
(
reshape_if_1d
(
inputs
[
0
]),
int8_x4_format
);
auto
w_desc
=
make_tensor
(
reshape_if_1d
(
inputs
[
1
]),
int8_x4_format
);
auto
y_desc
=
make_tensor
(
reshape_if_1d
(
output_shape
));
auto
status
=
miopenConvolutionForwardCompileSolution
(
ctx
.
get_stream
().
get_miopen
(),
w_desc
.
get
(),
x_desc
.
get
(),
cd
.
get
(),
y_desc
.
get
(),
solution_id
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen Convolution: compile solution failed"
);
}
#endif
}
inline
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
{
return
shapes
.
size
()
-
1
;
return
shapes
.
size
()
-
1
;
}
}
};
inline
shape
pack_int8_shape
(
const
shape
&
s
)
const
{
if
(
s
.
type
()
!=
shape
::
int8_type
)
{
return
s
;
}
auto
lens
=
s
.
lens
();
auto
strides
=
s
.
strides
();
lens
[
1
]
=
(
lens
[
1
]
+
3
)
/
4
*
4
;
strides
[
0
]
=
strides
[
1
]
*
lens
[
1
];
return
{
s
.
type
(),
lens
,
strides
};
}
};
}
// namespace gpu
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
}
// namespace migraphx
...
...
src/targets/gpu/include/migraphx/gpu/elu.hpp
deleted
100644 → 0
View file @
9c6ba1ed
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_RTGLIB_ELU_HPP
#define MIGRAPHX_GUARD_RTGLIB_ELU_HPP
#include <migraphx/op/elu.hpp>
#include <migraphx/shape.hpp>
#include <migraphx/reflect.hpp>
#include <migraphx/gpu/miopen.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
struct
context
;
struct
miopen_elu
{
op
::
elu
op
;
shared
<
activation_descriptor
>
ad
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
migraphx
::
reflect
(
self
.
op
,
f
);
}
std
::
string
name
()
const
{
return
"gpu::elu"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
;
argument
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
;
void
finalize
(
context
&
,
const
shape
&
,
const
std
::
vector
<
shape
>&
);
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/include/migraphx/gpu/mlir.hpp
View file @
e7f7ea10
...
@@ -36,7 +36,8 @@ struct module;
...
@@ -36,7 +36,8 @@ struct module;
namespace
gpu
{
namespace
gpu
{
std
::
string
dump_mlir
(
const
module
&
m
);
std
::
string
dump_mlir
(
const
module
&
m
);
code_object_op
compile_mlir
(
const
context
&
ctx
,
const
module
&
m
);
code_object_op
compile_mlir
(
const
context
&
ctx
,
module
m
,
const
std
::
vector
<
instruction_ref
>&
inputs
);
instruction_ref
insert_mlir
(
module
&
m
,
instruction_ref
insert_mlir
(
module
&
m
,
instruction_ref
ins
,
instruction_ref
ins
,
...
...
src/targets/gpu/include/migraphx/gpu/perfdb.hpp
View file @
e7f7ea10
...
@@ -41,7 +41,7 @@ struct problem_params
...
@@ -41,7 +41,7 @@ struct problem_params
shape
output
;
shape
output
;
};
};
std
::
string
get_mlir_perf_for_conv
(
const
problem_params
&
pp
);
std
::
string
get_mlir_perf_for_conv
(
const
problem_params
&
pp
,
bool
xdlops
);
}
// namespace gpu
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace MIGRAPHX_INLINE_NS
...
...
src/targets/gpu/include/migraphx/gpu/rocblas.hpp
View file @
e7f7ea10
...
@@ -25,7 +25,7 @@
...
@@ -25,7 +25,7 @@
#define MIGRAPHX_GUARD_MIGRAPHLIB_ROCBLAS_HPP
#define MIGRAPHX_GUARD_MIGRAPHLIB_ROCBLAS_HPP
#include <migraphx/manage_ptr.hpp>
#include <migraphx/manage_ptr.hpp>
#include <migraphx/config.hpp>
#include <migraphx/config.hpp>
#include <rocblas.h>
#include <rocblas
/rocblas
.h>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
...
src/targets/gpu/jit/concat.cpp
View file @
e7f7ea10
...
@@ -38,16 +38,19 @@ using namespace migraphx::gpu::gen; // NOLINT
...
@@ -38,16 +38,19 @@ using namespace migraphx::gpu::gen; // NOLINT
static
const
char
*
const
concat_kernel
=
R"__migraphx__(
static
const
char
*
const
concat_kernel
=
R"__migraphx__(
#include <migraphx/kernels/concat.hpp>
#include <migraphx/kernels/concat.hpp>
#include <migraphx/kernels/vectorize.hpp>
#include <migraphx/kernels/vectorize.hpp>
#include <migraphx/kernels/ops.hpp>
#include <args.hpp>
#include <args.hpp>
namespace migraphx {
namespace migraphx {
${preamble}
extern "C" {
extern "C" {
__global__ void ${kernel}(${params})
__global__ void ${kernel}(${params})
{
{
transform_args(make_tensors(), rotate_last(), ${transformers})(${args})([](auto y, auto... xs) {
transform_args(make_tensors(), rotate_last(), ${transformers})(${args})([](auto y,
${concat_params},
auto... xs) {
concat<${axis}>(y, xs...);
concat<${axis}>(
${concat_args})(${post},
y, xs...);
});
});
}
}
...
@@ -68,28 +71,42 @@ struct concat_compiler : compiler<concat_compiler>
...
@@ -68,28 +71,42 @@ struct concat_compiler : compiler<concat_compiler>
operation
compile_op
(
context
&
ctx
,
const
std
::
vector
<
shape
>&
inputs
,
const
value
&
v
)
const
operation
compile_op
(
context
&
ctx
,
const
std
::
vector
<
shape
>&
inputs
,
const
value
&
v
)
const
{
{
// TODO: Use reduce_dims
auto
num_of_concat_inputs
=
v
.
get
(
"concat_inputs"
,
inputs
.
size
()
-
1
);
hip_compile_options
options
;
hip_compile_options
options
;
options
.
inputs
=
inputs
;
options
.
inputs
=
inputs
;
options
.
output
=
inputs
.
back
();
options
.
output
=
inputs
.
back
();
options
.
params
=
"-Wno-float-equal"
;
options
.
params
=
"-Wno-float-equal"
;
options
.
kernel_name
=
v
.
get
(
"kernel"
,
"concat_kernel"
);
auto
axis
=
find_fast_axis
(
options
.
inputs
);
auto
axis
=
find_fast_axis
(
options
.
inputs
);
auto
vec
=
vectorize
::
elements
(
ctx
,
axis
,
options
.
inputs
);
auto
vec
=
vectorize
::
elements
(
ctx
,
axis
,
options
.
inputs
);
options
.
kernel_name
=
v
.
get
(
"kernel"
,
"concat_kernel"
);
options
.
set_launch_params
(
options
.
set_launch_params
(
v
,
compute_global_for
(
ctx
,
get_concat_elements
(
options
.
inputs
)
/
vec
.
size
,
256
));
v
,
compute_global_for
(
ctx
,
get_concat_elements
(
options
.
inputs
)
/
vec
.
size
,
256
));
auto
src
=
interpolate_string
(
concat_kernel
,
auto
src
=
interpolate_string
(
{{
"kernel"
,
options
.
kernel_name
},
concat_kernel
,
{
"params"
,
enum_params
(
inputs
.
size
(),
"void * private_p"
)},
{{
"kernel"
,
options
.
kernel_name
},
{
"args"
,
enum_params
(
inputs
.
size
(),
"private_p"
)},
{
"params"
,
enum_params
(
inputs
.
size
(),
"void * private_p"
)},
{
"transformers"
,
make_transformer_args
(
vec
)},
{
"args"
,
enum_params
(
inputs
.
size
(),
"private_p"
)},
{
"axis"
,
v
.
at
(
"axis"
).
to
<
std
::
string
>
()}});
{
"concat_params"
,
enum_params
(
num_of_concat_inputs
,
"auto concat_x"
)},
{
"concat_args"
,
enum_params
(
num_of_concat_inputs
,
"concat_x"
)},
{
"post"
,
v
.
get
(
"post"
,
std
::
string
{
"op::id{}"
})},
{
"transformers"
,
make_transformer_args
(
vec
)},
{
"preamble"
,
v
.
get
(
"preamble"
,
std
::
string
{})},
{
"axis"
,
v
.
at
(
"axis"
).
to
<
std
::
string
>
()}});
return
compile_hip_code_object
(
src
,
options
);
return
compile_hip_code_object
(
src
,
options
);
}
}
compiler_replace
compile
(
context
&
ctx
,
instruction_ref
ins
,
const
operation
&
op
)
const
compiler_replace
compile
(
context
&
ctx
,
instruction_ref
ins
,
const
operation
&
op
)
const
{
{
return
replace
(
compile_op
(
ctx
,
to_shapes
(
ins
->
inputs
()),
op
.
to_value
()));
auto
v
=
op
.
to_value
();
if
(
not
ins
->
module_inputs
().
empty
())
{
auto
*
pm
=
ins
->
module_inputs
().
front
();
v
[
"concat_inputs"
]
=
ins
->
inputs
().
size
()
-
pm
->
get_parameter_names
().
size
();
v
[
"preamble"
]
=
generate_pointwise
(
*
pm
,
"post_concat"
);
v
[
"post"
]
=
"MIGRAPHX_LIFT(post_concat)"
;
v
[
"kernel"
]
=
"concat_"
+
generate_name_from_ops
(
*
pm
)
+
"_kernel"
;
}
return
replace
(
compile_op
(
ctx
,
to_shapes
(
ins
->
inputs
()),
v
));
}
}
};
};
...
...
src/targets/gpu/jit/mlir.cpp
View file @
e7f7ea10
...
@@ -41,7 +41,7 @@ struct mlir_compiler : compiler<mlir_compiler>
...
@@ -41,7 +41,7 @@ struct mlir_compiler : compiler<mlir_compiler>
{
{
auto
*
smod
=
ins
->
module_inputs
().
front
();
auto
*
smod
=
ins
->
module_inputs
().
front
();
assert
(
smod
->
get_parameter_names
().
size
()
==
ins
->
inputs
().
size
()
-
1
);
assert
(
smod
->
get_parameter_names
().
size
()
==
ins
->
inputs
().
size
()
-
1
);
return
insert
(
compile_mlir
(
ctx
,
*
smod
));
return
insert
(
compile_mlir
(
ctx
,
*
smod
,
ins
->
inputs
()
));
}
}
compiler_replace
insert
(
code_object_op
co
)
const
compiler_replace
insert
(
code_object_op
co
)
const
...
...
src/targets/gpu/
batch_norm_inference
.cpp
→
src/targets/gpu/
jit/pad
.cpp
View file @
e7f7ea10
...
@@ -21,65 +21,80 @@
...
@@ -21,65 +21,80 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* THE SOFTWARE.
*/
*/
#include <migraphx/gpu/
batch_norm_inference
.hpp>
#include <migraphx/gpu/
compiler
.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/compile_hip_code_object.hpp>
#include <migraphx/gpu/compile_hip.hpp>
#include <migraphx/gpu/compile_gen.hpp>
#include <migraphx/reduce_dims.hpp>
#include <migraphx/float_equal.hpp>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
gpu
{
shape
miopen_batch_norm_inference
::
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
using
namespace
migraphx
::
gpu
::
gen
;
// NOLINT
static
const
char
*
const
pointwise_kernel
=
R"__migraphx__(
#include <migraphx/kernels/pad.hpp>
#include <migraphx/kernels/index.hpp>
#include <migraphx/kernels/ops.hpp>
#include <args.hpp>
namespace migraphx {
extern "C" {
__global__ void pad_kernel(void* input_p, void* output_p)
{
{
check_shapes
{
inputs
,
*
this
}.
has
(
6
);
auto offsets = index_ints<${offsets}>{};
check_shapes
{
inputs
.
data
(),
inputs
.
data
()
+
1
,
*
this
}.
same_ndims
().
max_ndims
(
5
);
auto idx = make_index();
return
op
.
compute_shape
({
inputs
.
at
(
0
),
inputs
.
at
(
1
),
inputs
.
at
(
2
),
inputs
.
at
(
3
),
inputs
.
at
(
4
)});
make_tensors()(input_p, output_p)([&](auto input, auto output) {
pad(idx, offsets, input, output, ${pad_val});
});
}
}
}
inline
shape
reshape_to_2d
(
const
shape
&
input
)
} // namespace migraphx
{
auto
dims
=
input
.
lens
();
if
(
dims
.
size
()
>=
4
)
return
input
;
std
::
vector
<
size_t
>
new_dims
(
dims
.
begin
(),
dims
.
end
());
)__migraphx__"
;
std
::
size_t
num
=
4
-
dims
.
size
();
new_dims
.
insert
(
new_dims
.
end
(),
num
,
1
);
return
{
input
.
type
(),
new_dims
};
}
argument
miopen_batch_norm_inference
::
compute
(
context
&
ctx
,
struct
pad_compiler
:
compiler
<
pad_compiler
>
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
{
{
shape
x_shape
=
args
[
0
].
get_shape
();
std
::
vector
<
std
::
string
>
names
()
const
{
return
{
"pad"
};
}
shape
y_shape
=
output_shape
;
shape
bn_shape
=
args
[
3
].
get_shape
();
auto
x_desc
=
make_tensor
(
reshape_to_2d
(
x_shape
));
operation
compile_op
(
context
&
ctx
,
const
std
::
vector
<
shape
>&
inputs
,
const
value
&
v
)
const
auto
y_desc
=
make_tensor
(
reshape_to_2d
(
y_shape
));
{
auto
bn_desc
=
make_tensor
(
reshape_to_2d
(
bn_shape
));
hip_compile_options
options
;
options
.
inputs
=
inputs
;
options
.
output
=
inputs
.
back
();
options
.
virtual_inputs
=
reduce_dims
(
inputs
);
options
.
kernel_name
=
"pad_kernel"
;
options
.
set_launch_params
(
v
,
compute_global_for
(
ctx
,
inputs
.
at
(
1
).
elements
()));
float
alpha
=
1.0
;
auto
pad_val
=
v
.
get
(
"value"
,
0.
f
);
float
beta
=
0.0
f
;
auto
pad_val_string
=
to_string
(
pad_val
);
if
(
float_equal
(
pad_val
,
std
::
numeric_limits
<
float
>::
lowest
()))
pad_val_string
=
"lowest{}"
;
if
(
float_equal
(
pad_val
,
std
::
numeric_limits
<
float
>::
max
()))
pad_val_string
=
"highest{}"
;
miopenBatchNormalizationForwardInference
(
ctx
.
get_stream
().
get_miopen
(),
auto
padding
=
v
.
at
(
"pads"
).
to_vector
<
int64_t
>
();
miopenBatchNormMode_t
(
op
.
bn_mode
),
auto
input_lens
=
inputs
.
front
().
lens
();
&
alpha
,
std
::
vector
<
size_t
>
offsets
(
input_lens
.
size
());
&
beta
,
std
::
copy
(
padding
.
begin
(),
padding
.
begin
()
+
offsets
.
size
(),
offsets
.
begin
());
x_desc
.
get
(),
args
[
0
].
implicit
(),
y_desc
.
get
(),
args
[
5
].
implicit
(),
bn_desc
.
get
(),
args
[
1
].
implicit
(),
args
[
2
].
implicit
(),
args
[
3
].
implicit
(),
args
[
4
].
implicit
(),
op
.
epsilon
);
return
args
[
5
];
auto
src
=
interpolate_string
(
}
pointwise_kernel
,
{{
"pad_val"
,
to_string
(
pad_val_string
)},
{
"offsets"
,
to_string_range
(
offsets
)}});
return
compile_hip_code_object
(
src
,
options
);
}
compiler_replace
compile
(
context
&
ctx
,
instruction_ref
ins
,
const
operation
&
op
)
const
{
return
replace
(
compile_op
(
ctx
,
to_shapes
(
ins
->
inputs
()),
op
.
to_value
()));
}
};
}
// namespace gpu
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
}
// namespace migraphx
src/targets/gpu/jit/pointwise.cpp
View file @
e7f7ea10
...
@@ -58,7 +58,7 @@ __global__ void ${kernel}(${params})
...
@@ -58,7 +58,7 @@ __global__ void ${kernel}(${params})
struct
pointwise_compiler
:
compiler
<
pointwise_compiler
>
struct
pointwise_compiler
:
compiler
<
pointwise_compiler
>
{
{
std
::
vector
<
std
::
string
>
names
()
const
{
return
{
"pointwise"
,
"contiguous"
};
}
std
::
vector
<
std
::
string
>
names
()
const
{
return
{
"pointwise"
,
"contiguous"
,
"layout"
};
}
static
std
::
size_t
oversubscribe_if
(
bool
b
)
static
std
::
size_t
oversubscribe_if
(
bool
b
)
{
{
...
@@ -91,12 +91,12 @@ struct pointwise_compiler : compiler<pointwise_compiler>
...
@@ -91,12 +91,12 @@ struct pointwise_compiler : compiler<pointwise_compiler>
compiler_replace
compile
(
context
&
ctx
,
instruction_ref
ins
,
const
operation
&
op
)
const
compiler_replace
compile
(
context
&
ctx
,
instruction_ref
ins
,
const
operation
&
op
)
const
{
{
if
(
op
.
name
()
==
"contiguous"
)
if
(
contains
({
"layout"
,
"contiguous"
},
op
.
name
())
)
{
{
return
replace
(
compile_op
(
return
replace
(
compile_op
(
ctx
,
ctx
,
to_shapes
(
ins
->
inputs
()),
to_shapes
(
ins
->
inputs
()),
{{
"lambda"
,
"[](auto x) { return x; }"
},
{
"kernel"
,
"contiguous
_kernel"
}}));
{{
"lambda"
,
"[](auto x) { return x; }"
},
{
"kernel"
,
op
.
name
()
+
"
_kernel"
}}));
}
}
else
else
{
{
...
...
src/targets/gpu/jit/scatternd.cpp
View file @
e7f7ea10
...
@@ -79,9 +79,10 @@ struct scatternd_compiler : compiler<scatternd_compiler>
...
@@ -79,9 +79,10 @@ struct scatternd_compiler : compiler<scatternd_compiler>
{
{
assert
(
starts_with
(
op
.
name
(),
"scatternd_"
));
assert
(
starts_with
(
op
.
name
(),
"scatternd_"
));
auto
reduction
=
op
.
name
().
substr
(
10
);
auto
reduction
=
op
.
name
().
substr
(
10
);
return
insert
(
compile_op
(
ctx
,
return
insert
(
compile_op
(
to_shapes
({
ins
->
inputs
().
begin
()
+
1
,
ins
->
inputs
().
end
()}),
ctx
,
{{
"reduction"
,
reduction
}}));
to_shapes
(
std
::
vector
<
instruction_ref
>
{
ins
->
inputs
().
begin
()
+
1
,
ins
->
inputs
().
end
()}),
{{
"reduction"
,
reduction
}}));
}
}
compiler_replace
insert
(
const
operation
&
op
)
const
compiler_replace
insert
(
const
operation
&
op
)
const
...
...
src/targets/gpu/kernels/include/migraphx/kernels/concat.hpp
View file @
e7f7ea10
...
@@ -41,7 +41,15 @@ constexpr auto concat_slice(Output out, Input, Start)
...
@@ -41,7 +41,15 @@ constexpr auto concat_slice(Output out, Input, Start)
return
Start
{}
*
output_shape
.
strides
[
Axis
];
return
Start
{}
*
output_shape
.
strides
[
Axis
];
});
});
constexpr
auto
s
=
make_shape
(
lens
,
strides
);
constexpr
auto
s
=
make_shape
(
lens
,
strides
);
return
make_tensor_view
(
&
out
[
offset
],
s
);
MIGRAPHX_ASSERT
(
offset
<
out
.
get_shape
().
element_space
());
MIGRAPHX_ASSERT
((
s
.
element_space
()
+
offset
)
<=
out
.
get_shape
().
element_space
());
return
make_tensor_view
(
out
.
data
()
+
offset
,
s
);
}
template
<
index_int
Axis
,
class
Input
,
class
Start
,
class
...
Ts
>
constexpr
auto
concat_slices
(
Input
input
,
Start
start
,
Ts
...
xs
)
{
return
[
=
](
auto
f
)
{
f
(
concat_slice
<
Axis
>
(
xs
,
input
,
start
)...);
};
}
}
template
<
index_int
Axis
,
class
Input
>
template
<
index_int
Axis
,
class
Input
>
...
@@ -51,15 +59,19 @@ constexpr auto concat_ends(Input)
...
@@ -51,15 +59,19 @@ constexpr auto concat_ends(Input)
return
_c
<
lens
[
Axis
]
>
;
return
_c
<
lens
[
Axis
]
>
;
}
}
template
<
index_int
Axis
,
class
Output
,
class
...
Inputs
>
template
<
index_int
Axis
,
class
...
Inputs
>
__device__
void
concat
(
Output
output
,
Inputs
...
inputs
)
__device__
auto
concat
(
Inputs
...
inputs
)
{
{
auto
idx
=
make_index
();
return
[
=
](
auto
f
,
auto
...
ts
)
{
fold
([
&
](
auto
start
,
auto
input
)
{
auto
idx
=
make_index
();
auto
y
=
concat_slice
<
Axis
>
(
output
,
input
,
start
);
fold
([
&
](
auto
start
,
auto
input
)
{
idx
.
global_stride
(
input
.
get_shape
().
elements
(),
[
&
](
auto
i
)
{
y
[
i
]
=
input
[
i
];
});
concat_slices
<
Axis
>
(
input
,
start
,
ts
...)([
&
](
auto
y
,
auto
...
xs
)
{
return
start
+
concat_ends
<
Axis
>
(
input
);
idx
.
global_stride
(
input
.
get_shape
().
elements
(),
})(
_c
<
0
>
,
inputs
...);
[
&
](
auto
i
)
{
y
[
i
]
=
f
(
input
[
i
],
xs
[
i
]...);
});
});
return
start
+
concat_ends
<
Axis
>
(
input
);
})(
_c
<
0
>
,
inputs
...);
};
}
}
}
// namespace migraphx
}
// namespace migraphx
...
...
src/targets/gpu/include/migraphx/
gpu/quant_convolution
.hpp
→
src/targets/gpu/
kernels/
include/migraphx/
kernels/pad
.hpp
View file @
e7f7ea10
...
@@ -21,53 +21,43 @@
...
@@ -21,53 +21,43 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* THE SOFTWARE.
*/
*/
#ifndef MIGRAPHX_GUARD_
RTGLIB_QUANT_CONVOLUTION
_HPP
#ifndef MIGRAPHX_GUARD_
KERNELS_PAD
_HPP
#define MIGRAPHX_GUARD_
RTGLIB_QUANT_CONVOLUTION
_HPP
#define MIGRAPHX_GUARD_
KERNELS_PAD
_HPP
#include <migraphx/shape.hpp>
#include <migraphx/
kernels/
shape.hpp>
#include <migraphx/
reflect
.hpp>
#include <migraphx/
kernels/index
.hpp>
#include <migraphx/
op/quant_convolution
.hpp>
#include <migraphx/
kernels/algorithm
.hpp>
#include <migraphx/
gpu/miopen
.hpp>
#include <migraphx/
kernels/ranges
.hpp>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
struct
context
;
template
<
class
Offsets
,
class
Input
,
class
Output
,
class
PadVal
>
__device__
void
pad
(
const
index
&
idx
,
struct
miopen_quant_convolution
const
Offsets
&
offsets
,
const
Input
&
input
,
Output
&
output
,
const
PadVal
&
pad_val
)
{
{
op
::
quant_convolution
op
;
auto
output_shape
=
output
.
get_shape
();
bool
int8_x4_format
=
false
;
idx
.
global_stride
(
output_shape
.
elements
(),
[
&
](
auto
i
)
{
shared
<
convolution_descriptor
>
cd
;
// 1. get current multi-index for output
miopenConvFwdAlgorithm_t
algo
{};
// 2. get the size of the input to determine input boundaries
uint64_t
solution_id
=
0
;
// 3. compute the corresponding multi-index for input by accounting for offsets
// 4. if current multi-index is within offsets or input's new multi-index is out of bounds,
template
<
class
Self
,
class
F
>
// use pad value instead of input's value
static
auto
reflect
(
Self
&
self
,
F
f
)
auto
multi
=
output_shape
.
multi
(
i
);
{
auto
input_bounds
=
input
.
get_shape
().
lens
;
// TODO: Add algo
auto
input_idx
=
multi
-
offsets
;
return
pack_join
(
migraphx
::
reflect
(
self
.
op
,
f
),
auto
range_multi
=
range
(
multi
.
size
());
pack
(
f
(
self
.
int8_x4_format
,
"int8_x4_format"
)));
}
if
(
any_of
(
range_multi
.
begin
(),
range_multi
.
end
(),
[
&
](
auto
j
)
{
return
multi
[
j
]
<
offsets
[
j
]
or
input_idx
[
j
]
>=
input_bounds
[
j
];
std
::
string
name
()
const
{
return
"gpu::quant_convolution"
;
}
}))
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
;
output
[
multi
]
=
pad_val
;
argument
else
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
;
output
[
multi
]
=
input
[
input_idx
];
shape
find
(
context
&
ctx
,
const
shape
&
output_shape
,
std
::
vector
<
shape
>
inputs
);
});
void
finalize
(
context
&
ctx
,
const
shape
&
output_shape
,
std
::
vector
<
shape
>
inputs
);
}
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
private:
shape
pack_int8_shape
(
const
shape
&
s
)
const
;
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
}
// namespace migraphx
#endif
#endif
src/targets/gpu/kernels/include/migraphx/kernels/ranges.hpp
0 → 100644
View file @
e7f7ea10
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_KERNELS_RANGES_HPP
#define MIGRAPHX_GUARD_KERNELS_RANGES_HPP
#include <migraphx/kernels/iota_iterator.hpp>
namespace
migraphx
{
template
<
class
Iterator
>
struct
iterator_range
{
Iterator
start
;
Iterator
last
;
constexpr
Iterator
begin
()
const
{
return
start
;
}
constexpr
Iterator
end
()
const
{
return
last
;
}
};
constexpr
iterator_range
<
iota_iterator
>
range
(
diff_int
start
,
diff_int
last
)
{
return
{{
start
,
{}},
{
last
,
{}}};
}
constexpr
iterator_range
<
iota_iterator
>
range
(
diff_int
last
)
{
return
range
(
0
,
last
);
}
}
// namespace migraphx
#endif // MIGRAPHX_GUARD_KERNELS_RANGES_HPP
src/targets/gpu/leaky_relu.cpp
deleted
100644 → 0
View file @
9c6ba1ed
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/gpu/leaky_relu.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/miopen.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
shape
miopen_leaky_relu
::
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
check_shapes
{
inputs
,
*
this
}.
has
(
2
).
not_broadcasted
();
return
inputs
.
at
(
1
);
}
argument
miopen_leaky_relu
::
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
{
float
alpha
=
1
;
float
beta
=
0
;
auto
x_desc
=
make_tensor
(
args
[
0
].
get_shape
());
auto
y_desc
=
make_tensor
(
output_shape
);
miopenActivationForward
(
ctx
.
get_stream
().
get_miopen
(),
ad
.
get
(),
&
alpha
,
x_desc
.
get
(),
args
[
0
].
implicit
(),
&
beta
,
y_desc
.
get
(),
args
[
1
].
implicit
());
return
args
[
1
];
}
void
miopen_leaky_relu
::
finalize
(
context
&
,
const
shape
&
,
const
std
::
vector
<
shape
>&
)
{
ad
=
make_leaky_relu
(
op
.
alpha
);
}
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/lowering.cpp
View file @
e7f7ea10
...
@@ -29,23 +29,15 @@
...
@@ -29,23 +29,15 @@
#include <migraphx/instruction_ref.hpp>
#include <migraphx/instruction_ref.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/op/convolution.hpp>
#include <migraphx/op/deconvolution.hpp>
#include <migraphx/op/dot.hpp>
#include <migraphx/op/dot.hpp>
#include <migraphx/op/if_op.hpp>
#include <migraphx/op/if_op.hpp>
#include <migraphx/op/reshape.hpp>
#include <migraphx/op/reshape.hpp>
#include <migraphx/op/quant_convolution.hpp>
#include <migraphx/op/quant_dot.hpp>
#include <migraphx/op/quant_dot.hpp>
#include <migraphx/gpu/batch_norm_inference.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/convolution.hpp>
#include <migraphx/gpu/deconvolution.hpp>
#include <migraphx/gpu/device_name.hpp>
#include <migraphx/gpu/device_name.hpp>
#include <migraphx/gpu/gemm.hpp>
#include <migraphx/gpu/gemm.hpp>
#include <migraphx/gpu/int8_conv_pack.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <migraphx/gpu/quant_convolution.hpp>
#include <migraphx/gpu/rocblas.hpp>
#include <migraphx/gpu/rocblas.hpp>
#include <migraphx/gpu/compiler.hpp>
#include <migraphx/gpu/compiler.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/iterator_for.hpp>
...
@@ -98,14 +90,11 @@ struct miopen_apply
...
@@ -98,14 +90,11 @@ struct miopen_apply
add_extend_op
(
"argmax"
);
add_extend_op
(
"argmax"
);
add_extend_op
(
"argmin"
);
add_extend_op
(
"argmin"
);
add_extend_op
(
"elu"
);
add_extend_op
(
"gather"
);
add_extend_op
(
"gather"
);
add_extend_op
(
"leaky_relu"
);
add_extend_op
(
"logsoftmax"
);
add_extend_op
(
"logsoftmax"
);
add_extend_op
(
"lrn"
);
add_extend_op
(
"lrn"
);
add_extend_op
(
"multinomial"
);
add_extend_op
(
"multinomial"
);
add_extend_op
(
"nonzero"
);
add_extend_op
(
"nonzero"
);
add_extend_op
(
"pad"
);
add_extend_op
(
"pooling"
);
add_extend_op
(
"pooling"
);
add_extend_op
(
"prefix_scan_sum"
);
add_extend_op
(
"prefix_scan_sum"
);
add_extend_op
(
"reverse"
);
add_extend_op
(
"reverse"
);
...
@@ -115,16 +104,15 @@ struct miopen_apply
...
@@ -115,16 +104,15 @@ struct miopen_apply
add_extend_op
(
"scatter_none"
);
add_extend_op
(
"scatter_none"
);
add_extend_op
(
"topk"
);
add_extend_op
(
"topk"
);
add_
batch_norm_inference_op
(
);
add_
convolution_op
(
"convolution"
);
add_convolution_op
();
add_convolution_op
(
"deconvolution"
);
add_
de
convolution_op
();
add_convolution_op
(
"quant_convolution"
);
add_gemm_op
<
op
::
dot
>
(
"dot"
);
add_gemm_op
<
op
::
dot
>
(
"dot"
);
add_gemm_op
<
op
::
quant_dot
>
(
"quant_dot"
);
add_gemm_op
<
op
::
quant_dot
>
(
"quant_dot"
);
add_if_op
();
add_if_op
();
add_loop_op
();
add_loop_op
();
add_neg_op
();
add_neg_op
();
add_nms_op
();
add_nms_op
();
add_quant_convolution_op
();
}
}
void
copy_params
()
const
void
copy_params
()
const
...
@@ -232,38 +220,6 @@ struct miopen_apply
...
@@ -232,38 +220,6 @@ struct miopen_apply
return
mod
->
insert_instruction
(
ins
,
make_op
(
"allocate"
,
{{
"shape"
,
to_value
(
s
)}}));
return
mod
->
insert_instruction
(
ins
,
make_op
(
"allocate"
,
{{
"shape"
,
to_value
(
s
)}}));
}
}
void
add_convolution_op
()
{
apply_map
.
emplace
(
"convolution"
,
[
=
](
instruction_ref
ins
)
{
auto
&&
op
=
any_cast
<
op
::
convolution
>
(
ins
->
get_operator
());
auto
conv
=
miopen_convolution
{
op
,
make_conv
(
op
)};
auto
ws
=
conv
.
find
(
get_context
(),
ins
->
get_shape
(),
to_shapes
(
ins
->
inputs
()));
auto
workspace
=
insert_allocation
(
ins
,
ws
);
auto
output
=
insert_allocation
(
ins
,
ins
->
get_shape
());
return
mod
->
replace_instruction
(
ins
,
conv
,
ins
->
inputs
().
at
(
0
),
ins
->
inputs
().
at
(
1
),
workspace
,
output
);
});
}
void
add_deconvolution_op
()
{
apply_map
.
emplace
(
"deconvolution"
,
[
=
](
instruction_ref
ins
)
{
auto
&&
op
=
any_cast
<
op
::
deconvolution
>
(
ins
->
get_operator
());
auto
conv
=
miopen_deconvolution
{
op
,
make_deconv
(
op
)};
auto
ws
=
conv
.
find
(
get_context
(),
ins
->
get_shape
(),
to_shapes
(
ins
->
inputs
()));
auto
workspace
=
insert_allocation
(
ins
,
ws
);
auto
output
=
insert_allocation
(
ins
,
ins
->
get_shape
());
return
mod
->
replace_instruction
(
ins
,
conv
,
ins
->
inputs
().
at
(
0
),
ins
->
inputs
().
at
(
1
),
workspace
,
output
);
});
}
template
<
typename
Op
>
template
<
typename
Op
>
void
add_gemm_op
(
const
std
::
string
&
name
)
void
add_gemm_op
(
const
std
::
string
&
name
)
{
{
...
@@ -277,32 +233,19 @@ struct miopen_apply
...
@@ -277,32 +233,19 @@ struct miopen_apply
});
});
}
}
void
add_
quant_
convolution_op
()
void
add_convolution_op
(
const
std
::
string
&
name
)
{
{
apply_map
.
emplace
(
"quant_convolution"
,
[
=
](
instruction_ref
ins
)
{
apply_map
.
emplace
(
name
,
[
=
](
instruction_ref
ins
)
{
auto
&&
op
=
any_cast
<
op
::
quant_convolution
>
(
ins
->
get_operator
());
operation
conv
=
make_op
(
shape
ws
;
"gpu::"
+
name
,
miopen_quant_convolution
conv
;
{{
"op"
,
ins
->
get_operator
().
to_value
()},
{
"int8_x4_format"
,
int8_x4_format
}});
auto
compile_quant_conv_with_format
=
[
&
](
bool
format
)
{
auto
output
=
insert_allocation
(
ins
,
ins
->
get_shape
());
conv
=
miopen_quant_convolution
{
op
,
format
,
make_conv
(
op
)};
ws
=
conv
.
find
(
get_context
(),
ins
->
get_shape
(),
to_shapes
(
ins
->
inputs
()));
};
try
{
compile_quant_conv_with_format
(
int8_x4_format
);
}
catch
(
migraphx
::
exception
&
)
{
// In case no solver supports the default format, retry using the other format.
compile_quant_conv_with_format
(
not
int8_x4_format
);
}
auto
args
=
ins
->
inputs
();
auto
workspace
=
insert_allocation
(
ins
,
ws
);
auto
output
=
insert_allocation
(
ins
,
ins
->
get_shape
());
return
mod
->
replace_instruction
(
ins
,
conv
,
args
[
0
],
args
[
1
],
workspace
,
output
);
return
mod
->
replace_instruction
(
ins
,
make_op
(
"gpu::miopen_op"
,
{{
"op"
,
to_value
(
conv
)}}),
ins
->
inputs
().
at
(
0
),
ins
->
inputs
().
at
(
1
),
output
);
});
});
}
}
...
@@ -336,43 +279,6 @@ struct miopen_apply
...
@@ -336,43 +279,6 @@ struct miopen_apply
});
});
}
}
void
add_batch_norm_inference_op
()
{
apply_map
.
emplace
(
"batch_norm_inference"
,
[
=
](
instruction_ref
ins
)
{
auto
&&
op
=
any_cast
<
op
::
batch_norm_inference
>
(
ins
->
get_operator
());
auto
output
=
insert_allocation
(
ins
,
ins
->
get_shape
());
shape
old_shape
=
ins
->
inputs
().
at
(
1
)
->
get_shape
();
auto
input
=
ins
->
inputs
()[
0
];
auto
input_lens
=
input
->
get_shape
().
lens
();
std
::
vector
<
int64_t
>
rsp_lens
(
input_lens
.
size
(),
1
);
// for per_activation case, also need to reshape input
if
(
op
.
bn_mode
==
op
::
batch_norm_inference
::
per_activation
)
{
std
::
copy
(
input_lens
.
begin
()
+
1
,
input_lens
.
end
(),
rsp_lens
.
begin
()
+
1
);
}
else
{
rsp_lens
[
1
]
=
static_cast
<
int64_t
>
(
old_shape
.
elements
());
}
auto
reshape_op
=
op
::
reshape
{
rsp_lens
};
std
::
vector
<
instruction_ref
>
reshapes
;
std
::
transform
(
ins
->
inputs
().
begin
()
+
1
,
ins
->
inputs
().
end
(),
std
::
back_inserter
(
reshapes
),
[
&
](
auto
i
)
{
return
mod
->
insert_instruction
(
ins
,
reshape_op
,
i
);
});
return
mod
->
replace_instruction
(
ins
,
miopen_batch_norm_inference
{
op
},
input
,
reshapes
[
0
],
reshapes
[
1
],
reshapes
[
2
],
reshapes
[
3
],
output
);
});
}
// use 0 - input to represent neg
// use 0 - input to represent neg
void
add_neg_op
()
void
add_neg_op
()
{
{
...
...
src/targets/gpu/mlir.cpp
View file @
e7f7ea10
...
@@ -21,6 +21,7 @@
...
@@ -21,6 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* THE SOFTWARE.
*/
*/
#include "migraphx/make_op.hpp"
#include <migraphx/gpu/mlir.hpp>
#include <migraphx/gpu/mlir.hpp>
#ifdef MIGRAPHX_MLIR
#ifdef MIGRAPHX_MLIR
...
@@ -43,8 +44,9 @@
...
@@ -43,8 +44,9 @@
#include <migraphx/gpu/code_object_op.hpp>
#include <migraphx/gpu/code_object_op.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/device_name.hpp>
#include <migraphx/gpu/device_name.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/gpu/perfdb.hpp>
#include <migraphx/gpu/perfdb.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/permutation.hpp>
#include <deque>
#include <deque>
#include <variant>
#include <variant>
...
@@ -99,7 +101,10 @@ struct mlir_handle
...
@@ -99,7 +101,10 @@ struct mlir_handle
mlir_handle
(
T
p
)
:
handle
(
ptr
{
p
})
{}
mlir_handle
(
T
p
)
:
handle
(
ptr
{
p
})
{}
T
get
()
const
{
return
handle
.
get
().
get
();
}
T
get
()
const
{
return
handle
.
get
().
get
();
// NOLINT(readability-redundant-smartptr-get)
}
T
release
()
{
return
handle
.
release
().
get
();
}
T
release
()
{
return
handle
.
release
().
get
();
}
...
@@ -370,7 +375,11 @@ struct mlir_program
...
@@ -370,7 +375,11 @@ struct mlir_program
mlir_operation_state
&
add_results
(
const
std
::
vector
<
shape
>&
outputs
)
mlir_operation_state
&
add_results
(
const
std
::
vector
<
shape
>&
outputs
)
{
{
auto
x
=
prog
->
make_tensors
(
outputs
);
std
::
vector
<
shape
>
reshaped
(
outputs
.
size
());
std
::
transform
(
outputs
.
begin
(),
outputs
.
end
(),
reshaped
.
begin
(),
[](
const
shape
&
r
)
{
return
shape
{
r
.
type
(),
r
.
lens
()};
});
auto
x
=
prog
->
make_tensors
(
reshaped
);
mlirOperationStateAddResults
(
&
op_state
,
x
.
size
(),
x
.
data
());
mlirOperationStateAddResults
(
&
op_state
,
x
.
size
(),
x
.
data
());
return
*
this
;
return
*
this
;
}
}
...
@@ -502,11 +511,12 @@ struct mlir_program
...
@@ -502,11 +511,12 @@ struct mlir_program
{
{
pp
=
pp
=
problem_params
{
ins
->
get_operator
(),
to_shapes
(
ins
->
inputs
()),
ins
->
get_shape
()};
problem_params
{
ins
->
get_operator
(),
to_shapes
(
ins
->
inputs
()),
ins
->
get_shape
()};
std
::
string
tuned
=
get_tune_params
();
// check if HW supports xdlops
bool
xdlops
=
contains
(
get_xdlops_archs
(),
target_name
);
std
::
string
tuned
=
get_tune_params
(
xdlops
);
if
(
not
tuned
.
empty
())
if
(
not
tuned
.
empty
())
ops
.
add_attributes
({{
"perf_config"
,
tuned
}});
ops
.
add_attributes
({{
"perf_config"
,
tuned
}});
// check if HW supports xdlops
if
(
xdlops
)
if
(
contains
(
get_xdlops_archs
(),
target_name
))
ops
.
add_attributes
({{
"xdlopsV2"
,
true
}});
ops
.
add_attributes
({{
"xdlopsV2"
,
true
}});
}
}
...
@@ -571,7 +581,7 @@ struct mlir_program
...
@@ -571,7 +581,7 @@ struct mlir_program
MIGRAPHX_THROW
(
"Failed to compile mlir program"
);
MIGRAPHX_THROW
(
"Failed to compile mlir program"
);
}
}
std
::
string
get_tune_params
()
{
return
get_mlir_perf_for_conv
(
pp
);
}
std
::
string
get_tune_params
(
bool
xdlops
)
{
return
get_mlir_perf_for_conv
(
pp
,
xdlops
);
}
mlir_context
ctx
;
mlir_context
ctx
;
MlirLocation
location
;
MlirLocation
location
;
...
@@ -589,8 +599,54 @@ std::string dump_mlir(const module& m)
...
@@ -589,8 +599,54 @@ std::string dump_mlir(const module& m)
return
mlir_print
(
&
mlirOperationPrint
,
mod_op
);
return
mlir_print
(
&
mlirOperationPrint
,
mod_op
);
}
}
code_object_op
compile_mlir
(
const
context
&
,
const
module
&
m
)
void
adjust_param_shapes
(
module
&
m
,
const
std
::
vector
<
instruction_ref
>&
inputs
)
{
{
auto
names
=
m
.
get_parameter_names
();
std
::
sort
(
names
.
begin
(),
names
.
end
());
for
(
auto
i
:
range
(
names
.
size
()))
{
const
auto
&
name
=
names
[
i
];
const
auto
&
input
=
inputs
[
i
]
->
get_shape
();
auto
param
=
m
.
get_parameter
(
name
);
if
(
input
.
standard
())
continue
;
auto
lens
=
input
.
lens
();
auto
strides
=
input
.
strides
();
std
::
vector
<
operation
>
ops
;
if
(
input
.
transposed
())
{
auto
perm
=
find_permutation
(
input
);
auto
iperm
=
invert_permutation
(
perm
);
lens
=
reorder_dims
(
lens
,
iperm
);
strides
=
reorder_dims
(
strides
,
iperm
);
ops
.
push_back
(
make_op
(
"transpose"
,
{{
"permutation"
,
perm
}}));
}
if
(
input
.
broadcasted
())
{
std
::
transform
(
lens
.
begin
(),
lens
.
end
(),
strides
.
begin
(),
lens
.
begin
(),
[](
auto
len
,
auto
stride
)
->
std
::
size_t
{
if
(
stride
==
0
)
return
1
;
return
len
;
});
ops
.
push_back
(
make_op
(
"multibroadcast"
,
{{
"out_lens"
,
input
.
lens
()}}));
}
auto
new_param
=
std
::
accumulate
(
ops
.
begin
(),
ops
.
end
(),
m
.
add_parameter
(
name
+
".0"
,
shape
{
input
.
type
(),
lens
}),
[
&
](
auto
x
,
auto
op
)
{
return
m
.
insert_instruction
(
param
,
op
,
x
);
});
m
.
replace_instruction
(
param
,
new_param
);
m
.
remove_instruction
(
param
);
}
}
code_object_op
compile_mlir
(
const
context
&
,
module
m
,
const
std
::
vector
<
instruction_ref
>&
inputs
)
{
adjust_param_shapes
(
m
,
inputs
);
const
bool
trace
=
enabled
(
MIGRAPHX_TRACE_MLIR
{});
const
bool
trace
=
enabled
(
MIGRAPHX_TRACE_MLIR
{});
if
(
trace
)
if
(
trace
)
std
::
cout
<<
m
<<
std
::
endl
;
std
::
cout
<<
m
<<
std
::
endl
;
...
@@ -662,13 +718,19 @@ instruction_ref insert_mlir(module& m,
...
@@ -662,13 +718,19 @@ instruction_ref insert_mlir(module& m,
std
::
string
dump_mlir
(
const
module
&
)
{
return
{};
}
std
::
string
dump_mlir
(
const
module
&
)
{
return
{};
}
code_object_op
compile_mlir
(
const
context
&
,
const
module
&
)
{
return
{};
}
template
<
class
T
>
template
<
class
T
>
void
use
(
T
&
)
void
use
(
T
&
)
{
{
}
}
// Disabling clang-tidy warning on non-real useage.
// NOLINTBEGIN(performance-unnecessary-value-param)
code_object_op
compile_mlir
(
const
context
&
,
module
,
const
std
::
vector
<
instruction_ref
>&
)
{
return
{};
}
// NOLINTEND(performance-unnecessary-value-param)
instruction_ref
instruction_ref
// cppcheck-suppress funcArgNamesDifferent
// cppcheck-suppress funcArgNamesDifferent
insert_mlir
(
module
&
m
,
instruction_ref
,
code_object_op
co
,
const
std
::
vector
<
instruction_ref
>&
)
insert_mlir
(
module
&
m
,
instruction_ref
,
code_object_op
co
,
const
std
::
vector
<
instruction_ref
>&
)
...
...
src/targets/gpu/perfdb.cpp
View file @
e7f7ea10
...
@@ -108,16 +108,17 @@ auto query_miopen_db(const std::string& query)
...
@@ -108,16 +108,17 @@ auto query_miopen_db(const std::string& query)
}
// namespace
}
// namespace
std
::
string
get_mlir_perf_for_conv
(
const
problem_params
&
pp
)
std
::
string
get_mlir_perf_for_conv
(
const
problem_params
&
pp
,
bool
xdlops
)
{
{
std
::
string
query
=
"select P.* \
std
::
string
solver
=
xdlops
?
"ConvMlirIgemmFwdXdlops"
:
"ConvMlirIgemmFwd"
;
std
::
string
query
=
"select P.* \
from perf_db P, config C \
from perf_db P, config C \
where P.config = C.id AND \
where P.config = C.id AND \
P.solver = '
ConvMlirIgemmFwdXdlops
' AND \
P.solver = '
${solver}
' AND \
${config}"
;
${config}"
;
auto
results
=
auto
results
=
query_miopen_db
(
query_miopen_db
(
interpolate_string
(
query
,
{{
"config"
,
generate_miopen_config
(
pp
)}}));
interpolate_string
(
query
,
{{
"config"
,
generate_miopen_config
(
pp
)}
,
{
"solver"
,
solver
}
}));
if
(
results
.
empty
())
if
(
results
.
empty
())
return
""
;
return
""
;
return
results
.
front
().
at
(
"params"
);
return
results
.
front
().
at
(
"params"
);
...
...
src/targets/gpu/prefuse_ops.cpp
View file @
e7f7ea10
...
@@ -104,7 +104,9 @@ struct find_add_layernorm
...
@@ -104,7 +104,9 @@ struct find_add_layernorm
{
{
auto
ins
=
r
.
result
;
auto
ins
=
r
.
result
;
auto
add_ins
=
r
.
instructions
[
"add"
];
auto
add_ins
=
r
.
instructions
[
"add"
];
auto
eps
=
r
.
instructions
[
"eps"
]
->
eval
().
at
<
float
>
();
float
eps
=
0
;
if
(
contains
(
r
.
instructions
,
"eps"
))
eps
=
r
.
instructions
[
"eps"
]
->
eval
().
at
<
float
>
();
m
.
replace_instruction
(
ins
,
add_layernorm
{
eps
},
add_ins
->
inputs
());
m
.
replace_instruction
(
ins
,
add_layernorm
{
eps
},
add_ins
->
inputs
());
}
}
...
...
src/targets/gpu/quant_convolution.cpp
deleted
100644 → 0
View file @
9c6ba1ed
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/gpu/quant_convolution.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/generate.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
shape
miopen_quant_convolution
::
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
check_shapes
{
inputs
,
*
this
}.
has
(
4
).
standard
();
return
op
.
normalize_compute_shape
({
inputs
.
at
(
0
),
inputs
.
at
(
1
)});
}
argument
miopen_quant_convolution
::
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
{
auto
x_desc
=
make_tensor
(
args
[
0
].
get_shape
(),
int8_x4_format
);
auto
w_desc
=
make_tensor
(
args
[
1
].
get_shape
(),
int8_x4_format
);
auto
y_desc
=
make_tensor
(
output_shape
);
float
alpha
=
1
;
float
beta
=
0
;
auto
status
=
miopenConvolutionForward
(
ctx
.
get_stream
().
get_miopen
(),
&
alpha
,
x_desc
.
get
(),
args
[
0
].
implicit
(),
w_desc
.
get
(),
args
[
1
].
implicit
(),
cd
.
get
(),
algo
,
&
beta
,
y_desc
.
get
(),
args
[
3
].
implicit
(),
args
[
2
].
implicit
(),
args
[
2
].
get_shape
().
bytes
());
if
(
status
!=
miopenStatusSuccess
)
{
MIGRAPHX_THROW
(
"QUANT_CONVOLUTION: run convolution forward failed"
);
}
return
args
[
3
];
}
shape
miopen_quant_convolution
::
find
(
context
&
ctx
,
const
shape
&
output_shape
,
std
::
vector
<
shape
>
inputs
)
{
shape
workspace_shape
{};
auto
x_desc
=
make_tensor
(
inputs
[
0
],
int8_x4_format
);
auto
w_desc
=
make_tensor
(
inputs
[
1
],
int8_x4_format
);
auto
y_desc
=
make_tensor
(
output_shape
);
std
::
size_t
workspace_size
=
0
;
miopenConvolutionForwardGetWorkSpaceSize
(
ctx
.
get_stream
().
get_miopen
(),
w_desc
.
get
(),
x_desc
.
get
(),
cd
.
get
(),
y_desc
.
get
(),
&
workspace_size
);
workspace_shape
=
shape
{
shape
::
int8_type
,
{
workspace_size
}};
auto
x_shape
=
inputs
[
0
];
auto
w_shape
=
inputs
[
1
];
if
(
int8_x4_format
)
{
x_shape
=
pack_int8_shape
(
x_shape
);
w_shape
=
pack_int8_shape
(
w_shape
);
}
auto
x
=
to_gpu
(
generate_argument
(
x_shape
));
auto
w
=
to_gpu
(
generate_argument
(
w_shape
));
auto
y
=
allocate_gpu
(
output_shape
);
auto
workspace
=
allocate_gpu
(
workspace_shape
);
int
algo_count
=
1
;
miopenConvAlgoPerf_t
perf
;
auto
status
=
miopenFindConvolutionForwardAlgorithm
(
ctx
.
get_stream
().
get_miopen
(),
x_desc
.
get
(),
x
.
implicit
(),
w_desc
.
get
(),
w
.
implicit
(),
cd
.
get
(),
y_desc
.
get
(),
y
.
implicit
(),
1
,
&
algo_count
,
&
perf
,
workspace
.
implicit
(),
workspace_size
,
false
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen Quant Convolution: find convolution failed"
);
algo
=
perf
.
fwd_algo
;
size_t
solution_count
;
status
=
miopenConvolutionForwardGetSolutionCount
(
ctx
.
get_stream
().
get_miopen
(),
w_desc
.
get
(),
x_desc
.
get
(),
cd
.
get
(),
y_desc
.
get
(),
&
solution_count
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen Quant Convolution: get solution count failed"
);
std
::
vector
<
miopenConvSolution_t
>
solutions
(
solution_count
);
status
=
miopenConvolutionForwardGetSolution
(
ctx
.
get_stream
().
get_miopen
(),
w_desc
.
get
(),
x_desc
.
get
(),
cd
.
get
(),
y_desc
.
get
(),
solution_count
,
&
solution_count
,
solutions
.
data
());
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen Quant Convolution: get solution failed"
);
solution_id
=
solutions
.
front
().
solution_id
;
return
shape
{
shape
::
int8_type
,
{
perf
.
memory
}};
}
void
miopen_quant_convolution
::
finalize
(
context
&
ctx
,
const
shape
&
output_shape
,
std
::
vector
<
shape
>
inputs
)
{
if
(
cd
==
nullptr
)
cd
=
make_conv
(
op
);
if
(
solution_id
==
0
)
{
// Check that workspace hasn't changed
auto
size
=
inputs
.
at
(
2
).
bytes
();
auto
ws
=
find
(
ctx
,
output_shape
,
inputs
);
if
(
ws
.
bytes
()
>
size
)
MIGRAPHX_THROW
(
"MIOpen Quant Convolution: workspace has changed during finalization."
);
}
auto
x_desc
=
make_tensor
(
inputs
[
0
],
int8_x4_format
);
auto
w_desc
=
make_tensor
(
inputs
[
1
],
int8_x4_format
);
auto
y_desc
=
make_tensor
(
output_shape
);
auto
status
=
miopenConvolutionForwardCompileSolution
(
ctx
.
get_stream
().
get_miopen
(),
w_desc
.
get
(),
x_desc
.
get
(),
cd
.
get
(),
y_desc
.
get
(),
solution_id
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen Quant Convolution: compile solution failed"
);
}
shape
miopen_quant_convolution
::
pack_int8_shape
(
const
shape
&
s
)
const
{
if
(
s
.
type
()
!=
shape
::
int8_type
)
{
MIGRAPHX_THROW
(
"PACK_INT8_SHAPE: only process int8_type"
);
}
auto
lens
=
s
.
lens
();
auto
strides
=
s
.
strides
();
lens
[
1
]
=
(
lens
[
1
]
+
3
)
/
4
*
4
;
strides
[
0
]
=
strides
[
1
]
*
lens
[
1
];
return
{
s
.
type
(),
lens
,
strides
};
}
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/target.cpp
View file @
e7f7ea10
...
@@ -35,13 +35,14 @@
...
@@ -35,13 +35,14 @@
#include <migraphx/fuse_pointwise.hpp>
#include <migraphx/fuse_pointwise.hpp>
#include <migraphx/inline_module.hpp>
#include <migraphx/inline_module.hpp>
#include <migraphx/insert_pad.hpp>
#include <migraphx/insert_pad.hpp>
#include <migraphx/layout_nhwc.hpp>
#include <migraphx/memory_coloring.hpp>
#include <migraphx/memory_coloring.hpp>
#include <migraphx/normalize_ops.hpp>
#include <migraphx/normalize_ops.hpp>
#include <migraphx/optimize.hpp>
#include <migraphx/preallocate_param.hpp>
#include <migraphx/preallocate_param.hpp>
#include <migraphx/propagate_constant.hpp>
#include <migraphx/propagate_constant.hpp>
#include <migraphx/register_target.hpp>
#include <migraphx/register_target.hpp>
#include <migraphx/replace_allocate.hpp>
#include <migraphx/replace_allocate.hpp>
#include <migraphx/rewrite_batchnorm.hpp>
#include <migraphx/rewrite_gelu.hpp>
#include <migraphx/rewrite_gelu.hpp>
#include <migraphx/rewrite_pooling.hpp>
#include <migraphx/rewrite_pooling.hpp>
#include <migraphx/rewrite_quantization.hpp>
#include <migraphx/rewrite_quantization.hpp>
...
@@ -51,6 +52,7 @@
...
@@ -51,6 +52,7 @@
#include <migraphx/simplify_qdq.hpp>
#include <migraphx/simplify_qdq.hpp>
#include <migraphx/simplify_reshapes.hpp>
#include <migraphx/simplify_reshapes.hpp>
#include <migraphx/gpu/allocation_model.hpp>
#include <migraphx/gpu/allocation_model.hpp>
#include <migraphx/gpu/compile_miopen.hpp>
#include <migraphx/gpu/compile_ops.hpp>
#include <migraphx/gpu/compile_ops.hpp>
#include <migraphx/gpu/concat_gpu_opt.hpp>
#include <migraphx/gpu/concat_gpu_opt.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/context.hpp>
...
@@ -73,6 +75,7 @@ namespace gpu {
...
@@ -73,6 +75,7 @@ namespace gpu {
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_DISABLE_SCHEDULE_PASS
)
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_DISABLE_SCHEDULE_PASS
)
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_DISABLE_POINTWISE_FUSION
)
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_DISABLE_POINTWISE_FUSION
)
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_ENABLE_NHWC
)
struct
id_pass
struct
id_pass
{
{
...
@@ -112,20 +115,15 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
...
@@ -112,20 +115,15 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
dead_code_elimination
{},
dead_code_elimination
{},
insert_pad
{},
insert_pad
{},
dead_code_elimination
{},
dead_code_elimination
{},
rewrite_batchnorm
{},
dead_code_elimination
{},
rewrite_rnn
{},
rewrite_rnn
{},
dead_code_elimination
{},
dead_code_elimination
{},
inline_module
{},
inline_module
{},
rewrite_pooling
{},
rewrite_pooling
{},
dead_code_elimination
{},
dead_code_elimination
{},
rewrite_gelu
{},
rewrite_gelu
{},
optimize
{},
enable_pass
(
enabled
(
MIGRAPHX_ENABLE_NHWC
{}),
layout_nhwc
{}),
dead_code_elimination
{},
dead_code_elimination
{},
eliminate_common_subexpression
{},
dead_code_elimination
{},
simplify_algebra
{},
simplify_reshapes
{},
simplify_algebra
{},
prefuse_ops
{},
prefuse_ops
{},
dead_code_elimination
{},
dead_code_elimination
{},
auto_contiguous
{},
auto_contiguous
{},
...
@@ -134,6 +132,7 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
...
@@ -134,6 +132,7 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
dead_code_elimination
{},
dead_code_elimination
{},
fuse_ck_gemm_softmax_gemm
{
&
ctx
},
fuse_ck_gemm_softmax_gemm
{
&
ctx
},
dead_code_elimination
{},
dead_code_elimination
{},
optimize
{},
enable_pass
(
not
enabled
(
MIGRAPHX_DISABLE_POINTWISE_FUSION
{}),
fuse_pointwise
{}),
enable_pass
(
not
enabled
(
MIGRAPHX_DISABLE_POINTWISE_FUSION
{}),
fuse_pointwise
{}),
dead_code_elimination
{},
dead_code_elimination
{},
fuse_mlir
{
&
ctx
},
fuse_mlir
{
&
ctx
},
...
@@ -145,6 +144,8 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
...
@@ -145,6 +144,8 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
dead_code_elimination
{},
dead_code_elimination
{},
eliminate_concat
{
concat_gpu_optimization
{}},
eliminate_concat
{
concat_gpu_optimization
{}},
dead_code_elimination
{},
dead_code_elimination
{},
compile_miopen
{
&
gctx
},
dead_code_elimination
{},
pack_int8_args
{},
pack_int8_args
{},
dead_code_elimination
{},
dead_code_elimination
{},
adjust_allocation
{
gpu_allocation_model
{}},
adjust_allocation
{
gpu_allocation_model
{}},
...
@@ -153,6 +154,8 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
...
@@ -153,6 +154,8 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
dead_code_elimination
{},
dead_code_elimination
{},
replace_allocate
{
gpu_allocation_model
{},
options
.
offload_copy
},
replace_allocate
{
gpu_allocation_model
{},
options
.
offload_copy
},
dead_code_elimination
{},
dead_code_elimination
{},
adjust_allocation
{
gpu_allocation_model
{}},
dead_code_elimination
{},
compile_ops
{
&
ctx
},
compile_ops
{
&
ctx
},
dead_code_elimination
{},
dead_code_elimination
{},
write_literals
{
&
ctx
},
write_literals
{
&
ctx
},
...
...
Prev
1
2
3
4
5
6
7
8
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment