Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
dae94657
Unverified
Commit
dae94657
authored
Dec 14, 2022
by
Chris Austen
Committed by
GitHub
Dec 14, 2022
Browse files
Merge branch 'develop' into jit-reduce-reg
parents
b013d991
56c43445
Changes
201
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
605 additions
and
383 deletions
+605
-383
src/targets/gpu/include/migraphx/gpu/convolution.hpp
src/targets/gpu/include/migraphx/gpu/convolution.hpp
+282
-16
src/targets/gpu/include/migraphx/gpu/elu.hpp
src/targets/gpu/include/migraphx/gpu/elu.hpp
+0
-64
src/targets/gpu/include/migraphx/gpu/hip.hpp
src/targets/gpu/include/migraphx/gpu/hip.hpp
+3
-3
src/targets/gpu/include/migraphx/gpu/leaky_relu.hpp
src/targets/gpu/include/migraphx/gpu/leaky_relu.hpp
+0
-64
src/targets/gpu/include/migraphx/gpu/mlir.hpp
src/targets/gpu/include/migraphx/gpu/mlir.hpp
+2
-1
src/targets/gpu/include/migraphx/gpu/perfdb.hpp
src/targets/gpu/include/migraphx/gpu/perfdb.hpp
+1
-1
src/targets/gpu/include/migraphx/gpu/rocblas.hpp
src/targets/gpu/include/migraphx/gpu/rocblas.hpp
+1
-1
src/targets/gpu/jit/concat.cpp
src/targets/gpu/jit/concat.cpp
+28
-11
src/targets/gpu/jit/mlir.cpp
src/targets/gpu/jit/mlir.cpp
+1
-2
src/targets/gpu/jit/pad.cpp
src/targets/gpu/jit/pad.cpp
+100
-0
src/targets/gpu/jit/pointwise.cpp
src/targets/gpu/jit/pointwise.cpp
+3
-3
src/targets/gpu/jit/scatternd.cpp
src/targets/gpu/jit/scatternd.cpp
+4
-3
src/targets/gpu/kernels/include/migraphx/kernels/concat.hpp
src/targets/gpu/kernels/include/migraphx/kernels/concat.hpp
+21
-9
src/targets/gpu/kernels/include/migraphx/kernels/layernorm.hpp
...argets/gpu/kernels/include/migraphx/kernels/layernorm.hpp
+1
-0
src/targets/gpu/kernels/include/migraphx/kernels/pad.hpp
src/targets/gpu/kernels/include/migraphx/kernels/pad.hpp
+63
-0
src/targets/gpu/kernels/include/migraphx/kernels/pointwise.hpp
...argets/gpu/kernels/include/migraphx/kernels/pointwise.hpp
+0
-32
src/targets/gpu/kernels/include/migraphx/kernels/ranges.hpp
src/targets/gpu/kernels/include/migraphx/kernels/ranges.hpp
+49
-0
src/targets/gpu/kernels/include/migraphx/kernels/vec.hpp
src/targets/gpu/kernels/include/migraphx/kernels/vec.hpp
+32
-0
src/targets/gpu/leaky_relu.cpp
src/targets/gpu/leaky_relu.cpp
+0
-65
src/targets/gpu/lowering.cpp
src/targets/gpu/lowering.cpp
+14
-108
No files found.
src/targets/gpu/include/migraphx/gpu/convolution.hpp
View file @
dae94657
...
@@ -25,18 +25,40 @@
...
@@ -25,18 +25,40 @@
#define MIGRAPHX_GUARD_RTGLIB_CONVOLUTION_HPP
#define MIGRAPHX_GUARD_RTGLIB_CONVOLUTION_HPP
#include <migraphx/shape.hpp>
#include <migraphx/shape.hpp>
#include <migraphx/op/convolution.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/operation.hpp>
#include <migraphx/register_op.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <migraphx/op/identity.hpp>
#include <migraphx/op/convolution.hpp>
#include <migraphx/op/quant_convolution.hpp>
#include <migraphx/op/deconvolution.hpp>
#include <unordered_map>
#include <migraphx/reflect.hpp>
#include <migraphx/gpu/context.hpp>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
gpu
{
struct
context
;
inline
shape
reshape_if_1d
(
const
shape
&
input
)
{
shape
new_shape
{
input
};
auto
dims
=
new_shape
.
lens
();
if
(
dims
.
size
()
==
3
)
{
std
::
vector
<
size_t
>
new_dims
=
dims
;
new_dims
.
insert
(
new_dims
.
begin
()
+
2
,
1
);
new_shape
=
shape
{
input
.
type
(),
new_dims
};
}
return
new_shape
;
}
template
<
class
Op
>
struct
miopen_convolution
struct
miopen_convolution
{
{
op
::
convolution
op
;
Op
op
;
bool
int8_x4_format
=
false
;
shared
<
convolution_descriptor
>
cd
=
nullptr
;
shared
<
convolution_descriptor
>
cd
=
nullptr
;
miopenConvFwdAlgorithm_t
algo
{};
miopenConvFwdAlgorithm_t
algo
{};
#ifdef MIGRAPHX_HAS_FIND_2_API
#ifdef MIGRAPHX_HAS_FIND_2_API
...
@@ -48,29 +70,273 @@ struct miopen_convolution
...
@@ -48,29 +70,273 @@ struct miopen_convolution
template
<
class
Self
,
class
F
>
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
static
auto
reflect
(
Self
&
self
,
F
f
)
{
{
return
pack
(
f
(
self
.
op
.
padding
,
"padding"
),
return
pack
(
f
(
self
.
op
,
"op"
),
f
(
self
.
op
.
stride
,
"stride"
),
f
(
self
.
op
.
dilation
,
"dilation"
),
f
(
self
.
op
.
group
,
"group"
),
f
(
self
.
op
.
padding_mode
,
"padding_mode"
),
#ifdef MIGRAPHX_HAS_FIND_2_API
#ifdef MIGRAPHX_HAS_FIND_2_API
f
(
self
.
solution_object
,
"solution_object"
),
f
(
self
.
solution_object
,
"solution_object"
),
#endif
#endif
f
(
self
.
algo
,
"algo"
),
f
(
self
.
int8_x4_format
,
"int8_x4_format"
),
f
(
self
.
solution_id
,
"solution_id"
));
f
(
self
.
solution_id
,
"solution_id"
));
}
}
std
::
string
name
()
const
{
return
"gpu::convolution"
;
}
std
::
string
name
()
const
{
return
"gpu::"
+
op
.
name
();
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
;
inline
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
check_shapes
{
inputs
,
op
}.
has
(
4
);
std
::
vector
<
shape
>
conv_inputs
(
inputs
.
begin
(),
inputs
.
begin
()
+
2
);
check_shapes
{
conv_inputs
,
*
this
}.
max_ndims
(
5
).
packed_layouts
(
{{
0
,
1
,
2
},
{
0
,
1
,
2
,
3
},
{
0
,
2
,
3
,
1
},
{
0
,
1
,
2
,
3
,
4
}});
return
migraphx
::
compute_shape
<
Op
>
(
op
,
conv_inputs
);
}
argument
argument
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
;
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
shape
find
(
context
&
ctx
,
const
shape
&
output_shape
,
std
::
vector
<
shape
>
inputs
);
{
void
finalize
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
shape
>&
inputs
);
auto
x_desc
=
make_tensor
(
reshape_if_1d
(
args
[
0
].
get_shape
()),
int8_x4_format
);
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
auto
w_desc
=
make_tensor
(
reshape_if_1d
(
args
[
1
].
get_shape
()),
int8_x4_format
);
auto
y_desc
=
make_tensor
(
reshape_if_1d
(
output_shape
));
auto
*
miopen_stream_handle
=
ctx
.
get_stream
().
get_miopen
();
auto
workspace_size
=
args
[
2
].
get_shape
().
bytes
();
#ifdef MIGRAPHX_HAS_FIND_2_API
{
const
miopenTensorArgument_t
tensor_args
[
3
]
=
{
{
miopenTensorConvolutionX
,
nullptr
,
args
[
0
].
implicit
()},
{
miopenTensorConvolutionW
,
nullptr
,
args
[
1
].
implicit
()},
{
miopenTensorConvolutionY
,
nullptr
,
args
[
3
].
implicit
()},
};
if
(
solution_ptr
.
get
()
==
nullptr
)
MIGRAPHX_THROW
(
"MIOpen "
+
op
.
name
()
+
" : Load MIOpen Solution before running it"
);
auto
status
=
miopenRunSolution
(
miopen_stream_handle
,
solution_ptr
.
get
(),
3
,
tensor_args
,
args
[
2
].
implicit
(),
workspace_size
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen "
+
op
.
name
()
+
" : running convolution using find_2.0 failed"
);
return
args
[
3
];
}
#else
// else use immediate mode
if
(
solution_id
==
0
)
MIGRAPHX_THROW
(
"MIOpen "
+
op
.
name
()
+
" : invalid solution ID"
);
auto
status
=
miopenConvolutionForwardImmediate
(
miopen_stream_handle
,
w_desc
.
get
(),
args
[
1
].
implicit
(),
x_desc
.
get
(),
args
[
0
].
implicit
(),
cd
.
get
(),
y_desc
.
get
(),
args
[
3
].
implicit
(),
args
[
2
].
implicit
(),
workspace_size
,
solution_id
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen "
+
op
.
name
()
+
": running convolution failed"
);
return
args
[
3
];
#endif
}
void
set_conv_descriptor
()
{
cd
=
(
op
.
name
()
==
"deconvolution"
)
?
make_deconv
(
op
)
:
make_conv
(
op
);
}
value
compile
(
migraphx
::
context
&
ctx
,
const
shape
&
output
,
const
std
::
vector
<
shape
>&
input
)
{
set_conv_descriptor
();
auto
ws
=
find
(
any_cast
<
migraphx
::
gpu
::
context
>
(
ctx
),
output
,
input
);
return
{{
"workspace"
,
ws
.
bytes
()}};
}
shape
find
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
shape
>&
inputs
)
{
shape
workspace_shape
{};
auto
x_desc
=
make_tensor
(
reshape_if_1d
(
inputs
[
0
]),
int8_x4_format
);
auto
w_desc
=
make_tensor
(
reshape_if_1d
(
inputs
[
1
]),
int8_x4_format
);
auto
y_desc
=
make_tensor
(
reshape_if_1d
(
output_shape
));
std
::
size_t
workspace_size
=
0
;
#ifdef MIGRAPHX_HAS_FIND_2_API
{
auto
conv_problem
=
make_obj
<
miopen_problem
>
(
&
miopenCreateConvProblem
,
cd
.
get
(),
miopenProblemDirectionForward
);
set_tensor_descriptor
(
miopenTensorConvolutionX
,
x_desc
,
conv_problem
);
set_tensor_descriptor
(
miopenTensorConvolutionW
,
w_desc
,
conv_problem
);
set_tensor_descriptor
(
miopenTensorConvolutionY
,
y_desc
,
conv_problem
);
auto
*
miopen_stream_handle
=
ctx
.
get_stream
().
get_miopen
();
solution_ptr
=
find_solution
(
miopen_stream_handle
,
conv_problem
.
get
());
auto
status
=
miopenGetSolutionWorkspaceSize
(
solution_ptr
.
get
(),
&
workspace_size
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen"
+
op
.
name
()
+
" : failed to get solution's workspace size"
);
std
::
size_t
solution_size
;
status
=
miopenGetSolutionSize
(
solution_ptr
.
get
(),
&
solution_size
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen"
+
op
.
name
()
+
": Failed to fetch solution size"
);
auto
solution_binary
=
std
::
vector
<
char
>
{};
solution_binary
.
resize
(
solution_size
);
status
=
miopenSaveSolution
(
solution_ptr
.
get
(),
solution_binary
.
data
());
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen"
+
op
.
name
()
+
": Saving solution failed"
);
solution_object
=
value
::
binary
{
solution_binary
.
data
(),
solution_size
};
return
shape
{
shape
::
int8_type
,
{
workspace_size
}};
}
#else
auto
status
=
miopenConvolutionForwardGetWorkSpaceSize
(
ctx
.
get_stream
().
get_miopen
(),
w_desc
.
get
(),
x_desc
.
get
(),
cd
.
get
(),
y_desc
.
get
(),
&
workspace_size
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen"
+
op
.
name
()
+
" : Failed to get forward workspace size"
);
workspace_shape
=
shape
{
shape
::
int8_type
,
{
workspace_size
}};
auto
x_shape
=
inputs
[
0
];
auto
w_shape
=
inputs
[
1
];
if
(
int8_x4_format
)
{
x_shape
=
pack_int8_shape
(
x_shape
);
w_shape
=
pack_int8_shape
(
w_shape
);
}
auto
x
=
to_gpu
(
generate_argument
(
x_shape
));
auto
w
=
to_gpu
(
generate_argument
(
w_shape
));
auto
y
=
allocate_gpu
(
output_shape
);
auto
workspace
=
allocate_gpu
(
workspace_shape
);
int
algo_count
=
1
;
miopenConvAlgoPerf_t
perf
;
status
=
miopenFindConvolutionForwardAlgorithm
(
ctx
.
get_stream
().
get_miopen
(),
x_desc
.
get
(),
x
.
implicit
(),
w_desc
.
get
(),
w
.
implicit
(),
cd
.
get
(),
y_desc
.
get
(),
y
.
implicit
(),
1
,
&
algo_count
,
&
perf
,
workspace
.
implicit
(),
workspace_size
,
false
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen "
+
op
.
name
()
+
" : find convolution failed"
);
algo
=
perf
.
fwd_algo
;
size_t
solution_count
;
status
=
miopenConvolutionForwardGetSolutionCount
(
ctx
.
get_stream
().
get_miopen
(),
w_desc
.
get
(),
x_desc
.
get
(),
cd
.
get
(),
y_desc
.
get
(),
&
solution_count
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen "
+
op
.
name
()
+
": get solution count failed"
);
std
::
vector
<
miopenConvSolution_t
>
solutions
(
solution_count
);
status
=
miopenConvolutionForwardGetSolution
(
ctx
.
get_stream
().
get_miopen
(),
w_desc
.
get
(),
x_desc
.
get
(),
cd
.
get
(),
y_desc
.
get
(),
solution_count
,
&
solution_count
,
solutions
.
data
());
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen "
+
op
.
name
()
+
": get solution failed"
);
solution_id
=
solutions
.
front
().
solution_id
;
return
shape
{
shape
::
int8_type
,
{
perf
.
memory
}};
#endif
}
void
finalize
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
shape
>&
inputs
)
{
#ifdef MIGRAPHX_HAS_FIND_2_API
{
(
void
)(
ctx
);
// avoid warnings
(
void
)(
output_shape
);
(
void
)(
inputs
);
// load solution
if
(
solution_ptr
==
nullptr
)
{
miopenSolution_t
ptr
;
auto
status
=
miopenLoadSolution
(
&
ptr
,
reinterpret_cast
<
const
char
*>
(
solution_object
.
data
()),
solution_object
.
size
());
solution_ptr
=
miopen_solution
{
ptr
};
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen "
+
op
.
name
()
+
": loading convolution solution failed"
);
}
}
#else
// Use immediate mode API
{
set_conv_descriptor
();
if
(
solution_id
==
0
)
{
// Check that workspace hasn't changed
auto
size
=
inputs
.
at
(
2
).
bytes
();
auto
ws
=
find
(
ctx
,
output_shape
,
inputs
);
if
(
ws
.
bytes
()
>
size
)
MIGRAPHX_THROW
(
"MIOpen "
+
op
.
name
()
+
": workspace has changed during finalization."
);
}
auto
x_desc
=
make_tensor
(
reshape_if_1d
(
inputs
[
0
]),
int8_x4_format
);
auto
w_desc
=
make_tensor
(
reshape_if_1d
(
inputs
[
1
]),
int8_x4_format
);
auto
y_desc
=
make_tensor
(
reshape_if_1d
(
output_shape
));
auto
status
=
miopenConvolutionForwardCompileSolution
(
ctx
.
get_stream
().
get_miopen
(),
w_desc
.
get
(),
x_desc
.
get
(),
cd
.
get
(),
y_desc
.
get
(),
solution_id
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen Convolution: compile solution failed"
);
}
#endif
}
inline
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
{
return
shapes
.
size
()
-
1
;
return
shapes
.
size
()
-
1
;
}
}
};
inline
shape
pack_int8_shape
(
const
shape
&
s
)
const
{
if
(
s
.
type
()
!=
shape
::
int8_type
)
{
return
s
;
}
auto
lens
=
s
.
lens
();
auto
strides
=
s
.
strides
();
lens
[
1
]
=
(
lens
[
1
]
+
3
)
/
4
*
4
;
strides
[
0
]
=
strides
[
1
]
*
lens
[
1
];
return
{
s
.
type
(),
lens
,
strides
};
}
};
}
// namespace gpu
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
}
// namespace migraphx
...
...
src/targets/gpu/include/migraphx/gpu/elu.hpp
deleted
100644 → 0
View file @
b013d991
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_RTGLIB_ELU_HPP
#define MIGRAPHX_GUARD_RTGLIB_ELU_HPP
#include <migraphx/op/elu.hpp>
#include <migraphx/shape.hpp>
#include <migraphx/reflect.hpp>
#include <migraphx/gpu/miopen.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
struct
context
;
struct
miopen_elu
{
op
::
elu
op
;
shared
<
activation_descriptor
>
ad
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
migraphx
::
reflect
(
self
.
op
,
f
);
}
std
::
string
name
()
const
{
return
"gpu::elu"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
;
argument
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
;
void
finalize
(
context
&
,
const
shape
&
,
const
std
::
vector
<
shape
>&
);
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/include/migraphx/gpu/hip.hpp
View file @
dae94657
...
@@ -105,7 +105,7 @@ struct hip_copy_to_gpu
...
@@ -105,7 +105,7 @@ struct hip_copy_to_gpu
std
::
string
name
()
const
{
return
"hip::copy_to_gpu"
;
}
std
::
string
name
()
const
{
return
"hip::copy_to_gpu"
;
}
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
{
{
check_shapes
{
inputs
,
*
this
}.
has
(
1
,
2
);
check_shapes
{
inputs
,
*
this
}.
has
(
1
,
2
)
.
same_type
()
;
return
inputs
.
at
(
0
);
return
inputs
.
at
(
0
);
}
}
argument
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
argument
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
...
@@ -131,7 +131,7 @@ struct hip_copy_from_gpu
...
@@ -131,7 +131,7 @@ struct hip_copy_from_gpu
std
::
string
name
()
const
{
return
"hip::copy_from_gpu"
;
}
std
::
string
name
()
const
{
return
"hip::copy_from_gpu"
;
}
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
{
{
check_shapes
{
inputs
,
*
this
}.
has
(
1
,
2
);
check_shapes
{
inputs
,
*
this
}.
has
(
1
,
2
)
.
same_type
()
;
return
inputs
.
at
(
0
);
return
inputs
.
at
(
0
);
}
}
argument
argument
...
@@ -159,7 +159,7 @@ struct hip_copy
...
@@ -159,7 +159,7 @@ struct hip_copy
std
::
string
name
()
const
{
return
"hip::copy"
;
}
std
::
string
name
()
const
{
return
"hip::copy"
;
}
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
{
{
check_shapes
{
inputs
,
*
this
}.
has
(
2
);
check_shapes
{
inputs
,
*
this
}.
has
(
2
)
.
same_type
()
;
return
inputs
.
at
(
1
);
return
inputs
.
at
(
1
);
}
}
argument
compute
(
context
&
ctx
,
const
shape
&
,
std
::
vector
<
argument
>
args
)
const
argument
compute
(
context
&
ctx
,
const
shape
&
,
std
::
vector
<
argument
>
args
)
const
...
...
src/targets/gpu/include/migraphx/gpu/leaky_relu.hpp
deleted
100644 → 0
View file @
b013d991
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_RTGLIB_LEAKY_RELU_HPP
#define MIGRAPHX_GUARD_RTGLIB_LEAKY_RELU_HPP
#include <migraphx/op/leaky_relu.hpp>
#include <migraphx/shape.hpp>
#include <migraphx/reflect.hpp>
#include <migraphx/gpu/miopen.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
struct
context
;
struct
miopen_leaky_relu
{
op
::
leaky_relu
op
;
shared
<
activation_descriptor
>
ad
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
migraphx
::
reflect
(
self
.
op
,
f
);
}
std
::
string
name
()
const
{
return
"gpu::leaky_relu"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
;
argument
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
;
void
finalize
(
context
&
,
const
shape
&
,
const
std
::
vector
<
shape
>&
);
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/include/migraphx/gpu/mlir.hpp
View file @
dae94657
...
@@ -36,7 +36,8 @@ struct module;
...
@@ -36,7 +36,8 @@ struct module;
namespace
gpu
{
namespace
gpu
{
std
::
string
dump_mlir
(
const
module
&
m
);
std
::
string
dump_mlir
(
const
module
&
m
);
code_object_op
compile_mlir
(
const
context
&
ctx
,
const
module
&
m
);
code_object_op
compile_mlir
(
const
context
&
ctx
,
module
m
,
const
std
::
vector
<
instruction_ref
>&
inputs
);
instruction_ref
insert_mlir
(
module
&
m
,
instruction_ref
insert_mlir
(
module
&
m
,
instruction_ref
ins
,
instruction_ref
ins
,
...
...
src/targets/gpu/include/migraphx/gpu/perfdb.hpp
View file @
dae94657
...
@@ -41,7 +41,7 @@ struct problem_params
...
@@ -41,7 +41,7 @@ struct problem_params
shape
output
;
shape
output
;
};
};
std
::
string
get_mlir_perf_for_conv
(
const
problem_params
&
pp
);
std
::
string
get_mlir_perf_for_conv
(
const
problem_params
&
pp
,
bool
xdlops
);
}
// namespace gpu
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace MIGRAPHX_INLINE_NS
...
...
src/targets/gpu/include/migraphx/gpu/rocblas.hpp
View file @
dae94657
...
@@ -25,7 +25,7 @@
...
@@ -25,7 +25,7 @@
#define MIGRAPHX_GUARD_MIGRAPHLIB_ROCBLAS_HPP
#define MIGRAPHX_GUARD_MIGRAPHLIB_ROCBLAS_HPP
#include <migraphx/manage_ptr.hpp>
#include <migraphx/manage_ptr.hpp>
#include <migraphx/config.hpp>
#include <migraphx/config.hpp>
#include <rocblas.h>
#include <rocblas
/rocblas
.h>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
...
src/targets/gpu/jit/concat.cpp
View file @
dae94657
...
@@ -38,16 +38,19 @@ using namespace migraphx::gpu::gen; // NOLINT
...
@@ -38,16 +38,19 @@ using namespace migraphx::gpu::gen; // NOLINT
static
const
char
*
const
concat_kernel
=
R"__migraphx__(
static
const
char
*
const
concat_kernel
=
R"__migraphx__(
#include <migraphx/kernels/concat.hpp>
#include <migraphx/kernels/concat.hpp>
#include <migraphx/kernels/vectorize.hpp>
#include <migraphx/kernels/vectorize.hpp>
#include <migraphx/kernels/ops.hpp>
#include <args.hpp>
#include <args.hpp>
namespace migraphx {
namespace migraphx {
${preamble}
extern "C" {
extern "C" {
__global__ void ${kernel}(${params})
__global__ void ${kernel}(${params})
{
{
transform_args(make_tensors(), rotate_last(), ${transformers})(${args})([](auto y, auto... xs) {
transform_args(make_tensors(), rotate_last(), ${transformers})(${args})([](auto y,
${concat_params},
auto... xs) {
concat<${axis}>(y, xs...);
concat<${axis}>(
${concat_args})(${post},
y, xs...);
});
});
}
}
...
@@ -68,28 +71,42 @@ struct concat_compiler : compiler<concat_compiler>
...
@@ -68,28 +71,42 @@ struct concat_compiler : compiler<concat_compiler>
operation
compile_op
(
context
&
ctx
,
const
std
::
vector
<
shape
>&
inputs
,
const
value
&
v
)
const
operation
compile_op
(
context
&
ctx
,
const
std
::
vector
<
shape
>&
inputs
,
const
value
&
v
)
const
{
{
// TODO: Use reduce_dims
auto
num_of_concat_inputs
=
v
.
get
(
"concat_inputs"
,
inputs
.
size
()
-
1
);
hip_compile_options
options
;
hip_compile_options
options
;
options
.
inputs
=
inputs
;
options
.
inputs
=
inputs
;
options
.
output
=
inputs
.
back
();
options
.
output
=
inputs
.
back
();
options
.
params
=
"-Wno-float-equal"
;
options
.
params
=
"-Wno-float-equal"
;
options
.
kernel_name
=
v
.
get
(
"kernel"
,
"concat_kernel"
);
auto
axis
=
find_fast_axis
(
options
.
inputs
);
auto
axis
=
find_fast_axis
(
options
.
inputs
);
auto
vec
=
vectorize
::
elements
(
ctx
,
axis
,
options
.
inputs
);
auto
vec
=
vectorize
::
elements
(
ctx
,
axis
,
options
.
inputs
);
options
.
kernel_name
=
v
.
get
(
"kernel"
,
"concat_kernel"
);
options
.
set_launch_params
(
options
.
set_launch_params
(
v
,
compute_global_for
(
ctx
,
get_concat_elements
(
options
.
inputs
)
/
vec
.
size
,
256
));
v
,
compute_global_for
(
ctx
,
get_concat_elements
(
options
.
inputs
)
/
vec
.
size
,
256
));
auto
src
=
interpolate_string
(
concat_kernel
,
auto
src
=
interpolate_string
(
{{
"kernel"
,
options
.
kernel_name
},
concat_kernel
,
{
"params"
,
enum_params
(
inputs
.
size
(),
"void * private_p"
)},
{{
"kernel"
,
options
.
kernel_name
},
{
"args"
,
enum_params
(
inputs
.
size
(),
"private_p"
)},
{
"params"
,
enum_params
(
inputs
.
size
(),
"void * private_p"
)},
{
"transformers"
,
make_transformer_args
(
vec
)},
{
"args"
,
enum_params
(
inputs
.
size
(),
"private_p"
)},
{
"axis"
,
v
.
at
(
"axis"
).
to
<
std
::
string
>
()}});
{
"concat_params"
,
enum_params
(
num_of_concat_inputs
,
"auto concat_x"
)},
{
"concat_args"
,
enum_params
(
num_of_concat_inputs
,
"concat_x"
)},
{
"post"
,
v
.
get
(
"post"
,
std
::
string
{
"op::id{}"
})},
{
"transformers"
,
make_transformer_args
(
vec
)},
{
"preamble"
,
v
.
get
(
"preamble"
,
std
::
string
{})},
{
"axis"
,
v
.
at
(
"axis"
).
to
<
std
::
string
>
()}});
return
compile_hip_code_object
(
src
,
options
);
return
compile_hip_code_object
(
src
,
options
);
}
}
compiler_replace
compile
(
context
&
ctx
,
instruction_ref
ins
,
const
operation
&
op
)
const
compiler_replace
compile
(
context
&
ctx
,
instruction_ref
ins
,
const
operation
&
op
)
const
{
{
return
replace
(
compile_op
(
ctx
,
to_shapes
(
ins
->
inputs
()),
op
.
to_value
()));
auto
v
=
op
.
to_value
();
if
(
not
ins
->
module_inputs
().
empty
())
{
auto
*
pm
=
ins
->
module_inputs
().
front
();
v
[
"concat_inputs"
]
=
ins
->
inputs
().
size
()
-
pm
->
get_parameter_names
().
size
();
v
[
"preamble"
]
=
generate_pointwise
(
*
pm
,
"post_concat"
);
v
[
"post"
]
=
"MIGRAPHX_LIFT(post_concat)"
;
v
[
"kernel"
]
=
"concat_"
+
generate_name_from_ops
(
*
pm
)
+
"_kernel"
;
}
return
replace
(
compile_op
(
ctx
,
to_shapes
(
ins
->
inputs
()),
v
));
}
}
};
};
...
...
src/targets/gpu/jit/mlir.cpp
View file @
dae94657
...
@@ -24,7 +24,6 @@
...
@@ -24,7 +24,6 @@
#include <migraphx/gpu/compiler.hpp>
#include <migraphx/gpu/compiler.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/mlir.hpp>
#include <migraphx/gpu/mlir.hpp>
namespace
migraphx
{
namespace
migraphx
{
...
@@ -41,7 +40,7 @@ struct mlir_compiler : compiler<mlir_compiler>
...
@@ -41,7 +40,7 @@ struct mlir_compiler : compiler<mlir_compiler>
{
{
auto
*
smod
=
ins
->
module_inputs
().
front
();
auto
*
smod
=
ins
->
module_inputs
().
front
();
assert
(
smod
->
get_parameter_names
().
size
()
==
ins
->
inputs
().
size
()
-
1
);
assert
(
smod
->
get_parameter_names
().
size
()
==
ins
->
inputs
().
size
()
-
1
);
return
insert
(
compile_mlir
(
ctx
,
*
smod
));
return
insert
(
compile_mlir
(
ctx
,
*
smod
,
ins
->
inputs
()
));
}
}
compiler_replace
insert
(
code_object_op
co
)
const
compiler_replace
insert
(
code_object_op
co
)
const
...
...
src/targets/gpu/
batch_norm_inference
.cpp
→
src/targets/gpu/
jit/pad
.cpp
View file @
dae94657
...
@@ -21,65 +21,80 @@
...
@@ -21,65 +21,80 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* THE SOFTWARE.
*/
*/
#include <migraphx/gpu/
batch_norm_inference
.hpp>
#include <migraphx/gpu/
compiler
.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/compile_hip_code_object.hpp>
#include <migraphx/gpu/compile_hip.hpp>
#include <migraphx/gpu/compile_gen.hpp>
#include <migraphx/reduce_dims.hpp>
#include <migraphx/float_equal.hpp>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
gpu
{
shape
miopen_batch_norm_inference
::
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
using
namespace
migraphx
::
gpu
::
gen
;
// NOLINT
static
const
char
*
const
pointwise_kernel
=
R"__migraphx__(
#include <migraphx/kernels/pad.hpp>
#include <migraphx/kernels/index.hpp>
#include <migraphx/kernels/ops.hpp>
#include <args.hpp>
namespace migraphx {
extern "C" {
__global__ void pad_kernel(void* input_p, void* output_p)
{
{
check_shapes
{
inputs
,
*
this
}.
has
(
6
);
auto offsets = index_ints<${offsets}>{};
check_shapes
{
inputs
.
data
(),
inputs
.
data
()
+
1
,
*
this
}.
same_ndims
().
max_ndims
(
5
);
auto idx = make_index();
return
op
.
compute_shape
({
inputs
.
at
(
0
),
inputs
.
at
(
1
),
inputs
.
at
(
2
),
inputs
.
at
(
3
),
inputs
.
at
(
4
)});
make_tensors()(input_p, output_p)([&](auto input, auto output) {
pad(idx, offsets, input, output, ${pad_val});
});
}
}
}
inline
shape
reshape_to_2d
(
const
shape
&
input
)
} // namespace migraphx
{
auto
dims
=
input
.
lens
();
if
(
dims
.
size
()
>=
4
)
return
input
;
std
::
vector
<
size_t
>
new_dims
(
dims
.
begin
(),
dims
.
end
());
)__migraphx__"
;
std
::
size_t
num
=
4
-
dims
.
size
();
new_dims
.
insert
(
new_dims
.
end
(),
num
,
1
);
return
{
input
.
type
(),
new_dims
};
}
argument
miopen_batch_norm_inference
::
compute
(
context
&
ctx
,
struct
pad_compiler
:
compiler
<
pad_compiler
>
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
{
{
shape
x_shape
=
args
[
0
].
get_shape
();
std
::
vector
<
std
::
string
>
names
()
const
{
return
{
"pad"
};
}
shape
y_shape
=
output_shape
;
shape
bn_shape
=
args
[
3
].
get_shape
();
auto
x_desc
=
make_tensor
(
reshape_to_2d
(
x_shape
));
operation
compile_op
(
context
&
ctx
,
const
std
::
vector
<
shape
>&
inputs
,
const
value
&
v
)
const
auto
y_desc
=
make_tensor
(
reshape_to_2d
(
y_shape
));
{
auto
bn_desc
=
make_tensor
(
reshape_to_2d
(
bn_shape
));
hip_compile_options
options
;
options
.
inputs
=
inputs
;
options
.
output
=
inputs
.
back
();
options
.
virtual_inputs
=
reduce_dims
(
inputs
);
options
.
kernel_name
=
"pad_kernel"
;
options
.
set_launch_params
(
v
,
compute_global_for
(
ctx
,
inputs
.
at
(
1
).
elements
()));
float
alpha
=
1.0
;
auto
pad_val
=
v
.
get
(
"value"
,
0.
f
);
float
beta
=
0.0
f
;
auto
pad_val_string
=
to_string
(
pad_val
);
if
(
float_equal
(
pad_val
,
std
::
numeric_limits
<
float
>::
lowest
()))
pad_val_string
=
"lowest{}"
;
if
(
float_equal
(
pad_val
,
std
::
numeric_limits
<
float
>::
max
()))
pad_val_string
=
"highest{}"
;
miopenBatchNormalizationForwardInference
(
ctx
.
get_stream
().
get_miopen
(),
auto
padding
=
v
.
at
(
"pads"
).
to_vector
<
int64_t
>
();
miopenBatchNormMode_t
(
op
.
bn_mode
),
auto
input_lens
=
inputs
.
front
().
lens
();
&
alpha
,
std
::
vector
<
size_t
>
offsets
(
input_lens
.
size
());
&
beta
,
std
::
copy
(
padding
.
begin
(),
padding
.
begin
()
+
offsets
.
size
(),
offsets
.
begin
());
x_desc
.
get
(),
args
[
0
].
implicit
(),
y_desc
.
get
(),
args
[
5
].
implicit
(),
bn_desc
.
get
(),
args
[
1
].
implicit
(),
args
[
2
].
implicit
(),
args
[
3
].
implicit
(),
args
[
4
].
implicit
(),
op
.
epsilon
);
return
args
[
5
];
auto
src
=
interpolate_string
(
}
pointwise_kernel
,
{{
"pad_val"
,
to_string
(
pad_val_string
)},
{
"offsets"
,
to_string_range
(
offsets
)}});
return
compile_hip_code_object
(
src
,
options
);
}
compiler_replace
compile
(
context
&
ctx
,
instruction_ref
ins
,
const
operation
&
op
)
const
{
return
replace
(
compile_op
(
ctx
,
to_shapes
(
ins
->
inputs
()),
op
.
to_value
()));
}
};
}
// namespace gpu
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
}
// namespace migraphx
src/targets/gpu/jit/pointwise.cpp
View file @
dae94657
...
@@ -58,7 +58,7 @@ __global__ void ${kernel}(${params})
...
@@ -58,7 +58,7 @@ __global__ void ${kernel}(${params})
struct
pointwise_compiler
:
compiler
<
pointwise_compiler
>
struct
pointwise_compiler
:
compiler
<
pointwise_compiler
>
{
{
std
::
vector
<
std
::
string
>
names
()
const
{
return
{
"pointwise"
,
"contiguous"
};
}
std
::
vector
<
std
::
string
>
names
()
const
{
return
{
"pointwise"
,
"contiguous"
,
"layout"
};
}
static
std
::
size_t
oversubscribe_if
(
bool
b
)
static
std
::
size_t
oversubscribe_if
(
bool
b
)
{
{
...
@@ -91,12 +91,12 @@ struct pointwise_compiler : compiler<pointwise_compiler>
...
@@ -91,12 +91,12 @@ struct pointwise_compiler : compiler<pointwise_compiler>
compiler_replace
compile
(
context
&
ctx
,
instruction_ref
ins
,
const
operation
&
op
)
const
compiler_replace
compile
(
context
&
ctx
,
instruction_ref
ins
,
const
operation
&
op
)
const
{
{
if
(
op
.
name
()
==
"contiguous"
)
if
(
contains
({
"layout"
,
"contiguous"
},
op
.
name
())
)
{
{
return
replace
(
compile_op
(
return
replace
(
compile_op
(
ctx
,
ctx
,
to_shapes
(
ins
->
inputs
()),
to_shapes
(
ins
->
inputs
()),
{{
"lambda"
,
"[](auto x) { return x; }"
},
{
"kernel"
,
"contiguous
_kernel"
}}));
{{
"lambda"
,
"[](auto x) { return x; }"
},
{
"kernel"
,
op
.
name
()
+
"
_kernel"
}}));
}
}
else
else
{
{
...
...
src/targets/gpu/jit/scatternd.cpp
View file @
dae94657
...
@@ -79,9 +79,10 @@ struct scatternd_compiler : compiler<scatternd_compiler>
...
@@ -79,9 +79,10 @@ struct scatternd_compiler : compiler<scatternd_compiler>
{
{
assert
(
starts_with
(
op
.
name
(),
"scatternd_"
));
assert
(
starts_with
(
op
.
name
(),
"scatternd_"
));
auto
reduction
=
op
.
name
().
substr
(
10
);
auto
reduction
=
op
.
name
().
substr
(
10
);
return
insert
(
compile_op
(
ctx
,
return
insert
(
compile_op
(
to_shapes
({
ins
->
inputs
().
begin
()
+
1
,
ins
->
inputs
().
end
()}),
ctx
,
{{
"reduction"
,
reduction
}}));
to_shapes
(
std
::
vector
<
instruction_ref
>
{
ins
->
inputs
().
begin
()
+
1
,
ins
->
inputs
().
end
()}),
{{
"reduction"
,
reduction
}}));
}
}
compiler_replace
insert
(
const
operation
&
op
)
const
compiler_replace
insert
(
const
operation
&
op
)
const
...
...
src/targets/gpu/kernels/include/migraphx/kernels/concat.hpp
View file @
dae94657
...
@@ -41,7 +41,15 @@ constexpr auto concat_slice(Output out, Input, Start)
...
@@ -41,7 +41,15 @@ constexpr auto concat_slice(Output out, Input, Start)
return
Start
{}
*
output_shape
.
strides
[
Axis
];
return
Start
{}
*
output_shape
.
strides
[
Axis
];
});
});
constexpr
auto
s
=
make_shape
(
lens
,
strides
);
constexpr
auto
s
=
make_shape
(
lens
,
strides
);
return
make_tensor_view
(
&
out
[
offset
],
s
);
MIGRAPHX_ASSERT
(
offset
<
out
.
get_shape
().
element_space
());
MIGRAPHX_ASSERT
((
s
.
element_space
()
+
offset
)
<=
out
.
get_shape
().
element_space
());
return
make_tensor_view
(
out
.
data
()
+
offset
,
s
);
}
template
<
index_int
Axis
,
class
Input
,
class
Start
,
class
...
Ts
>
constexpr
auto
concat_slices
(
Input
input
,
Start
start
,
Ts
...
xs
)
{
return
[
=
](
auto
f
)
{
f
(
concat_slice
<
Axis
>
(
xs
,
input
,
start
)...);
};
}
}
template
<
index_int
Axis
,
class
Input
>
template
<
index_int
Axis
,
class
Input
>
...
@@ -51,15 +59,19 @@ constexpr auto concat_ends(Input)
...
@@ -51,15 +59,19 @@ constexpr auto concat_ends(Input)
return
_c
<
lens
[
Axis
]
>
;
return
_c
<
lens
[
Axis
]
>
;
}
}
template
<
index_int
Axis
,
class
Output
,
class
...
Inputs
>
template
<
index_int
Axis
,
class
...
Inputs
>
__device__
void
concat
(
Output
output
,
Inputs
...
inputs
)
__device__
auto
concat
(
Inputs
...
inputs
)
{
{
auto
idx
=
make_index
();
return
[
=
](
auto
f
,
auto
...
ts
)
{
fold
([
&
](
auto
start
,
auto
input
)
{
auto
idx
=
make_index
();
auto
y
=
concat_slice
<
Axis
>
(
output
,
input
,
start
);
fold
([
&
](
auto
start
,
auto
input
)
{
idx
.
global_stride
(
input
.
get_shape
().
elements
(),
[
&
](
auto
i
)
{
y
[
i
]
=
input
[
i
];
});
concat_slices
<
Axis
>
(
input
,
start
,
ts
...)([
&
](
auto
y
,
auto
...
xs
)
{
return
start
+
concat_ends
<
Axis
>
(
input
);
idx
.
global_stride
(
input
.
get_shape
().
elements
(),
})(
_c
<
0
>
,
inputs
...);
[
&
](
auto
i
)
{
y
[
i
]
=
f
(
input
[
i
],
xs
[
i
]...);
});
});
return
start
+
concat_ends
<
Axis
>
(
input
);
})(
_c
<
0
>
,
inputs
...);
};
}
}
}
// namespace migraphx
}
// namespace migraphx
...
...
src/targets/gpu/kernels/include/migraphx/kernels/layernorm.hpp
View file @
dae94657
...
@@ -25,6 +25,7 @@
...
@@ -25,6 +25,7 @@
#define MIGRAPHX_GUARD_KERNELS_LAYERNORM_HPP
#define MIGRAPHX_GUARD_KERNELS_LAYERNORM_HPP
#include <migraphx/kernels/reduce.hpp>
#include <migraphx/kernels/reduce.hpp>
#include <migraphx/kernels/ops.hpp>
#include <migraphx/kernels/ops.hpp>
#include <migraphx/kernels/vec.hpp>
#include <migraphx/kernels/print.hpp>
#include <migraphx/kernels/print.hpp>
namespace
migraphx
{
namespace
migraphx
{
...
...
src/targets/gpu/include/migraphx/
gpu/quant_convolution
.hpp
→
src/targets/gpu/
kernels/
include/migraphx/
kernels/pad
.hpp
View file @
dae94657
...
@@ -21,53 +21,43 @@
...
@@ -21,53 +21,43 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* THE SOFTWARE.
*/
*/
#ifndef MIGRAPHX_GUARD_
RTGLIB_QUANT_CONVOLUTION
_HPP
#ifndef MIGRAPHX_GUARD_
KERNELS_PAD
_HPP
#define MIGRAPHX_GUARD_
RTGLIB_QUANT_CONVOLUTION
_HPP
#define MIGRAPHX_GUARD_
KERNELS_PAD
_HPP
#include <migraphx/shape.hpp>
#include <migraphx/
kernels/
shape.hpp>
#include <migraphx/
reflect
.hpp>
#include <migraphx/
kernels/index
.hpp>
#include <migraphx/
op/quant_convolution
.hpp>
#include <migraphx/
kernels/algorithm
.hpp>
#include <migraphx/
gpu/miopen
.hpp>
#include <migraphx/
kernels/ranges
.hpp>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
struct
context
;
template
<
class
Offsets
,
class
Input
,
class
Output
,
class
PadVal
>
__device__
void
pad
(
const
index
&
idx
,
struct
miopen_quant_convolution
const
Offsets
&
offsets
,
const
Input
&
input
,
Output
&
output
,
const
PadVal
&
pad_val
)
{
{
op
::
quant_convolution
op
;
auto
output_shape
=
output
.
get_shape
();
bool
int8_x4_format
=
false
;
idx
.
global_stride
(
output_shape
.
elements
(),
[
&
](
auto
i
)
{
shared
<
convolution_descriptor
>
cd
;
// 1. get current multi-index for output
miopenConvFwdAlgorithm_t
algo
{};
// 2. get the size of the input to determine input boundaries
uint64_t
solution_id
=
0
;
// 3. compute the corresponding multi-index for input by accounting for offsets
// 4. if current multi-index is within offsets or input's new multi-index is out of bounds,
template
<
class
Self
,
class
F
>
// use pad value instead of input's value
static
auto
reflect
(
Self
&
self
,
F
f
)
auto
multi
=
output_shape
.
multi
(
i
);
{
auto
input_bounds
=
input
.
get_shape
().
lens
;
// TODO: Add algo
auto
input_idx
=
multi
-
offsets
;
return
pack_join
(
migraphx
::
reflect
(
self
.
op
,
f
),
auto
range_multi
=
range
(
multi
.
size
());
pack
(
f
(
self
.
int8_x4_format
,
"int8_x4_format"
)));
}
if
(
any_of
(
range_multi
.
begin
(),
range_multi
.
end
(),
[
&
](
auto
j
)
{
return
multi
[
j
]
<
offsets
[
j
]
or
input_idx
[
j
]
>=
input_bounds
[
j
];
std
::
string
name
()
const
{
return
"gpu::quant_convolution"
;
}
}))
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
;
output
[
multi
]
=
pad_val
;
argument
else
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
;
output
[
multi
]
=
input
[
input_idx
];
shape
find
(
context
&
ctx
,
const
shape
&
output_shape
,
std
::
vector
<
shape
>
inputs
);
});
void
finalize
(
context
&
ctx
,
const
shape
&
output_shape
,
std
::
vector
<
shape
>
inputs
);
}
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
private:
shape
pack_int8_shape
(
const
shape
&
s
)
const
;
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
}
// namespace migraphx
#endif
#endif
src/targets/gpu/kernels/include/migraphx/kernels/pointwise.hpp
View file @
dae94657
...
@@ -33,38 +33,6 @@
...
@@ -33,38 +33,6 @@
namespace
migraphx
{
namespace
migraphx
{
template
<
class
T
>
struct
implicit_conversion_op
{
T
x
;
template
<
index_int
N
,
class
U
>
constexpr
operator
vec
<
U
,
N
>
()
const
{
if
constexpr
(
vec_size
<
T
>
()
==
0
)
{
return
x
;
}
else
{
static_assert
(
vec_size
<
T
>
()
==
N
,
"Vector mismatch size"
);
return
__builtin_convertvector
(
x
,
vec
<
U
,
N
>
);
}
}
template
<
class
U
>
constexpr
operator
U
()
const
{
return
x
;
}
};
template
<
class
T
>
constexpr
implicit_conversion_op
<
T
>
implicit_conversion
(
T
x
)
{
return
{
x
};
}
template
<
class
F
,
class
T
,
class
...
Ts
>
template
<
class
F
,
class
T
,
class
...
Ts
>
__device__
void
pointwise_tensor
(
index
idx
,
F
f
,
T
out
,
Ts
...
xs
)
__device__
void
pointwise_tensor
(
index
idx
,
F
f
,
T
out
,
Ts
...
xs
)
{
{
...
...
src/targets/gpu/kernels/include/migraphx/kernels/ranges.hpp
0 → 100644
View file @
dae94657
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_KERNELS_RANGES_HPP
#define MIGRAPHX_GUARD_KERNELS_RANGES_HPP
#include <migraphx/kernels/iota_iterator.hpp>
namespace
migraphx
{
template
<
class
Iterator
>
struct
iterator_range
{
Iterator
start
;
Iterator
last
;
constexpr
Iterator
begin
()
const
{
return
start
;
}
constexpr
Iterator
end
()
const
{
return
last
;
}
};
constexpr
iterator_range
<
iota_iterator
>
range
(
diff_int
start
,
diff_int
last
)
{
return
{{
start
,
{}},
{
last
,
{}}};
}
constexpr
iterator_range
<
iota_iterator
>
range
(
diff_int
last
)
{
return
range
(
0
,
last
);
}
}
// namespace migraphx
#endif // MIGRAPHX_GUARD_KERNELS_RANGES_HPP
src/targets/gpu/kernels/include/migraphx/kernels/vec.hpp
View file @
dae94657
...
@@ -185,5 +185,37 @@ constexpr auto vec_reduce(T x, Op op)
...
@@ -185,5 +185,37 @@ constexpr auto vec_reduce(T x, Op op)
}
}
}
}
template
<
class
T
>
struct
implicit_conversion_op
{
T
x
;
template
<
index_int
N
,
class
U
>
constexpr
operator
vec
<
U
,
N
>
()
const
{
if
constexpr
(
vec_size
<
T
>
()
==
0
)
{
return
x
;
}
else
{
static_assert
(
vec_size
<
T
>
()
==
N
,
"Vector mismatch size"
);
return
__builtin_convertvector
(
x
,
vec
<
U
,
N
>
);
}
}
template
<
class
U
>
constexpr
operator
U
()
const
{
return
x
;
}
};
template
<
class
T
>
constexpr
implicit_conversion_op
<
T
>
implicit_conversion
(
T
x
)
{
return
{
x
};
}
}
// namespace migraphx
}
// namespace migraphx
#endif // MIGRAPHX_GUARD_KERNELS_VEC_HPP
#endif // MIGRAPHX_GUARD_KERNELS_VEC_HPP
src/targets/gpu/leaky_relu.cpp
deleted
100644 → 0
View file @
b013d991
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/gpu/leaky_relu.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/miopen.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
shape
miopen_leaky_relu
::
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
check_shapes
{
inputs
,
*
this
}.
has
(
2
).
not_broadcasted
();
return
inputs
.
at
(
1
);
}
argument
miopen_leaky_relu
::
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
{
float
alpha
=
1
;
float
beta
=
0
;
auto
x_desc
=
make_tensor
(
args
[
0
].
get_shape
());
auto
y_desc
=
make_tensor
(
output_shape
);
miopenActivationForward
(
ctx
.
get_stream
().
get_miopen
(),
ad
.
get
(),
&
alpha
,
x_desc
.
get
(),
args
[
0
].
implicit
(),
&
beta
,
y_desc
.
get
(),
args
[
1
].
implicit
());
return
args
[
1
];
}
void
miopen_leaky_relu
::
finalize
(
context
&
,
const
shape
&
,
const
std
::
vector
<
shape
>&
)
{
ad
=
make_leaky_relu
(
op
.
alpha
);
}
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/lowering.cpp
View file @
dae94657
...
@@ -29,23 +29,15 @@
...
@@ -29,23 +29,15 @@
#include <migraphx/instruction_ref.hpp>
#include <migraphx/instruction_ref.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/op/convolution.hpp>
#include <migraphx/op/deconvolution.hpp>
#include <migraphx/op/dot.hpp>
#include <migraphx/op/dot.hpp>
#include <migraphx/op/if_op.hpp>
#include <migraphx/op/if_op.hpp>
#include <migraphx/op/reshape.hpp>
#include <migraphx/op/reshape.hpp>
#include <migraphx/op/quant_convolution.hpp>
#include <migraphx/op/quant_dot.hpp>
#include <migraphx/op/quant_dot.hpp>
#include <migraphx/gpu/batch_norm_inference.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/convolution.hpp>
#include <migraphx/gpu/deconvolution.hpp>
#include <migraphx/gpu/device_name.hpp>
#include <migraphx/gpu/device_name.hpp>
#include <migraphx/gpu/gemm.hpp>
#include <migraphx/gpu/gemm.hpp>
#include <migraphx/gpu/int8_conv_pack.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <migraphx/gpu/quant_convolution.hpp>
#include <migraphx/gpu/rocblas.hpp>
#include <migraphx/gpu/rocblas.hpp>
#include <migraphx/gpu/compiler.hpp>
#include <migraphx/gpu/compiler.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/iterator_for.hpp>
...
@@ -98,14 +90,11 @@ struct miopen_apply
...
@@ -98,14 +90,11 @@ struct miopen_apply
add_extend_op
(
"argmax"
);
add_extend_op
(
"argmax"
);
add_extend_op
(
"argmin"
);
add_extend_op
(
"argmin"
);
add_extend_op
(
"elu"
);
add_extend_op
(
"gather"
);
add_extend_op
(
"gather"
);
add_extend_op
(
"leaky_relu"
);
add_extend_op
(
"logsoftmax"
);
add_extend_op
(
"logsoftmax"
);
add_extend_op
(
"lrn"
);
add_extend_op
(
"lrn"
);
add_extend_op
(
"multinomial"
);
add_extend_op
(
"multinomial"
);
add_extend_op
(
"nonzero"
);
add_extend_op
(
"nonzero"
);
add_extend_op
(
"pad"
);
add_extend_op
(
"pooling"
);
add_extend_op
(
"pooling"
);
add_extend_op
(
"prefix_scan_sum"
);
add_extend_op
(
"prefix_scan_sum"
);
add_extend_op
(
"reverse"
);
add_extend_op
(
"reverse"
);
...
@@ -115,16 +104,15 @@ struct miopen_apply
...
@@ -115,16 +104,15 @@ struct miopen_apply
add_extend_op
(
"scatter_none"
);
add_extend_op
(
"scatter_none"
);
add_extend_op
(
"topk"
);
add_extend_op
(
"topk"
);
add_
batch_norm_inference_op
(
);
add_
convolution_op
(
"convolution"
);
add_convolution_op
();
add_convolution_op
(
"deconvolution"
);
add_
de
convolution_op
();
add_convolution_op
(
"quant_convolution"
);
add_gemm_op
<
op
::
dot
>
(
"dot"
);
add_gemm_op
<
op
::
dot
>
(
"dot"
);
add_gemm_op
<
op
::
quant_dot
>
(
"quant_dot"
);
add_gemm_op
<
op
::
quant_dot
>
(
"quant_dot"
);
add_if_op
();
add_if_op
();
add_loop_op
();
add_loop_op
();
add_neg_op
();
add_neg_op
();
add_nms_op
();
add_nms_op
();
add_quant_convolution_op
();
}
}
void
copy_params
()
const
void
copy_params
()
const
...
@@ -232,38 +220,6 @@ struct miopen_apply
...
@@ -232,38 +220,6 @@ struct miopen_apply
return
mod
->
insert_instruction
(
ins
,
make_op
(
"allocate"
,
{{
"shape"
,
to_value
(
s
)}}));
return
mod
->
insert_instruction
(
ins
,
make_op
(
"allocate"
,
{{
"shape"
,
to_value
(
s
)}}));
}
}
void
add_convolution_op
()
{
apply_map
.
emplace
(
"convolution"
,
[
=
](
instruction_ref
ins
)
{
auto
&&
op
=
any_cast
<
op
::
convolution
>
(
ins
->
get_operator
());
auto
conv
=
miopen_convolution
{
op
,
make_conv
(
op
)};
auto
ws
=
conv
.
find
(
get_context
(),
ins
->
get_shape
(),
to_shapes
(
ins
->
inputs
()));
auto
workspace
=
insert_allocation
(
ins
,
ws
);
auto
output
=
insert_allocation
(
ins
,
ins
->
get_shape
());
return
mod
->
replace_instruction
(
ins
,
conv
,
ins
->
inputs
().
at
(
0
),
ins
->
inputs
().
at
(
1
),
workspace
,
output
);
});
}
void
add_deconvolution_op
()
{
apply_map
.
emplace
(
"deconvolution"
,
[
=
](
instruction_ref
ins
)
{
auto
&&
op
=
any_cast
<
op
::
deconvolution
>
(
ins
->
get_operator
());
auto
conv
=
miopen_deconvolution
{
op
,
make_deconv
(
op
)};
auto
ws
=
conv
.
find
(
get_context
(),
ins
->
get_shape
(),
to_shapes
(
ins
->
inputs
()));
auto
workspace
=
insert_allocation
(
ins
,
ws
);
auto
output
=
insert_allocation
(
ins
,
ins
->
get_shape
());
return
mod
->
replace_instruction
(
ins
,
conv
,
ins
->
inputs
().
at
(
0
),
ins
->
inputs
().
at
(
1
),
workspace
,
output
);
});
}
template
<
typename
Op
>
template
<
typename
Op
>
void
add_gemm_op
(
const
std
::
string
&
name
)
void
add_gemm_op
(
const
std
::
string
&
name
)
{
{
...
@@ -277,32 +233,19 @@ struct miopen_apply
...
@@ -277,32 +233,19 @@ struct miopen_apply
});
});
}
}
void
add_
quant_
convolution_op
()
void
add_convolution_op
(
const
std
::
string
&
name
)
{
{
apply_map
.
emplace
(
"quant_convolution"
,
[
=
](
instruction_ref
ins
)
{
apply_map
.
emplace
(
name
,
[
=
](
instruction_ref
ins
)
{
auto
&&
op
=
any_cast
<
op
::
quant_convolution
>
(
ins
->
get_operator
());
operation
conv
=
make_op
(
shape
ws
;
"gpu::"
+
name
,
miopen_quant_convolution
conv
;
{{
"op"
,
ins
->
get_operator
().
to_value
()},
{
"int8_x4_format"
,
int8_x4_format
}});
auto
compile_quant_conv_with_format
=
[
&
](
bool
format
)
{
auto
output
=
insert_allocation
(
ins
,
ins
->
get_shape
());
conv
=
miopen_quant_convolution
{
op
,
format
,
make_conv
(
op
)};
ws
=
conv
.
find
(
get_context
(),
ins
->
get_shape
(),
to_shapes
(
ins
->
inputs
()));
};
try
{
compile_quant_conv_with_format
(
int8_x4_format
);
}
catch
(
migraphx
::
exception
&
)
{
// In case no solver supports the default format, retry using the other format.
compile_quant_conv_with_format
(
not
int8_x4_format
);
}
auto
args
=
ins
->
inputs
();
auto
workspace
=
insert_allocation
(
ins
,
ws
);
auto
output
=
insert_allocation
(
ins
,
ins
->
get_shape
());
return
mod
->
replace_instruction
(
ins
,
conv
,
args
[
0
],
args
[
1
],
workspace
,
output
);
return
mod
->
replace_instruction
(
ins
,
make_op
(
"gpu::miopen_op"
,
{{
"op"
,
to_value
(
conv
)}}),
ins
->
inputs
().
at
(
0
),
ins
->
inputs
().
at
(
1
),
output
);
});
});
}
}
...
@@ -336,43 +279,6 @@ struct miopen_apply
...
@@ -336,43 +279,6 @@ struct miopen_apply
});
});
}
}
void
add_batch_norm_inference_op
()
{
apply_map
.
emplace
(
"batch_norm_inference"
,
[
=
](
instruction_ref
ins
)
{
auto
&&
op
=
any_cast
<
op
::
batch_norm_inference
>
(
ins
->
get_operator
());
auto
output
=
insert_allocation
(
ins
,
ins
->
get_shape
());
shape
old_shape
=
ins
->
inputs
().
at
(
1
)
->
get_shape
();
auto
input
=
ins
->
inputs
()[
0
];
auto
input_lens
=
input
->
get_shape
().
lens
();
std
::
vector
<
int64_t
>
rsp_lens
(
input_lens
.
size
(),
1
);
// for per_activation case, also need to reshape input
if
(
op
.
bn_mode
==
op
::
batch_norm_inference
::
per_activation
)
{
std
::
copy
(
input_lens
.
begin
()
+
1
,
input_lens
.
end
(),
rsp_lens
.
begin
()
+
1
);
}
else
{
rsp_lens
[
1
]
=
static_cast
<
int64_t
>
(
old_shape
.
elements
());
}
auto
reshape_op
=
op
::
reshape
{
rsp_lens
};
std
::
vector
<
instruction_ref
>
reshapes
;
std
::
transform
(
ins
->
inputs
().
begin
()
+
1
,
ins
->
inputs
().
end
(),
std
::
back_inserter
(
reshapes
),
[
&
](
auto
i
)
{
return
mod
->
insert_instruction
(
ins
,
reshape_op
,
i
);
});
return
mod
->
replace_instruction
(
ins
,
miopen_batch_norm_inference
{
op
},
input
,
reshapes
[
0
],
reshapes
[
1
],
reshapes
[
2
],
reshapes
[
3
],
output
);
});
}
// use 0 - input to represent neg
// use 0 - input to represent neg
void
add_neg_op
()
void
add_neg_op
()
{
{
...
...
Prev
1
2
3
4
5
6
7
8
9
10
11
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment