Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
11e155c2
Commit
11e155c2
authored
Jun 13, 2022
by
Paul
Browse files
Merge
parents
8a9c5bce
aa7ff911
Changes
397
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
641 additions
and
47 deletions
+641
-47
src/targets/gpu/include/migraphx/gpu/context.hpp
src/targets/gpu/include/migraphx/gpu/context.hpp
+9
-0
src/targets/gpu/include/migraphx/gpu/device/prefix_scan_sum.hpp
...rgets/gpu/include/migraphx/gpu/device/prefix_scan_sum.hpp
+6
-1
src/targets/gpu/include/migraphx/gpu/eliminate_workspace.hpp
src/targets/gpu/include/migraphx/gpu/eliminate_workspace.hpp
+1
-1
src/targets/gpu/include/migraphx/gpu/fuse_ops.hpp
src/targets/gpu/include/migraphx/gpu/fuse_ops.hpp
+1
-1
src/targets/gpu/include/migraphx/gpu/gemm.hpp
src/targets/gpu/include/migraphx/gpu/gemm.hpp
+16
-27
src/targets/gpu/include/migraphx/gpu/gemm_impl.hpp
src/targets/gpu/include/migraphx/gpu/gemm_impl.hpp
+4
-2
src/targets/gpu/include/migraphx/gpu/miopen.hpp
src/targets/gpu/include/migraphx/gpu/miopen.hpp
+9
-3
src/targets/gpu/include/migraphx/gpu/prefix_scan_sum.hpp
src/targets/gpu/include/migraphx/gpu/prefix_scan_sum.hpp
+2
-3
src/targets/gpu/include/migraphx/gpu/prefuse_ops.hpp
src/targets/gpu/include/migraphx/gpu/prefuse_ops.hpp
+24
-0
src/targets/gpu/include/migraphx/gpu/quant_convolution.hpp
src/targets/gpu/include/migraphx/gpu/quant_convolution.hpp
+4
-1
src/targets/gpu/include/migraphx/gpu/scatter.hpp
src/targets/gpu/include/migraphx/gpu/scatter.hpp
+5
-3
src/targets/gpu/include/migraphx/gpu/schedule_model.hpp
src/targets/gpu/include/migraphx/gpu/schedule_model.hpp
+3
-3
src/targets/gpu/include/migraphx/gpu/sync_device.hpp
src/targets/gpu/include/migraphx/gpu/sync_device.hpp
+1
-1
src/targets/gpu/include/migraphx/gpu/write_literals.hpp
src/targets/gpu/include/migraphx/gpu/write_literals.hpp
+1
-1
src/targets/gpu/jit/gathernd.cpp
src/targets/gpu/jit/gathernd.cpp
+75
-0
src/targets/gpu/jit/pointwise.cpp
src/targets/gpu/jit/pointwise.cpp
+126
-0
src/targets/gpu/jit/reduce.cpp
src/targets/gpu/jit/reduce.cpp
+179
-0
src/targets/gpu/jit/roialign.cpp
src/targets/gpu/jit/roialign.cpp
+87
-0
src/targets/gpu/jit/scatternd.cpp
src/targets/gpu/jit/scatternd.cpp
+86
-0
src/targets/gpu/kernel.cpp
src/targets/gpu/kernel.cpp
+2
-0
No files found.
src/targets/gpu/include/migraphx/gpu/context.hpp
View file @
11e155c2
...
@@ -154,6 +154,13 @@ struct hip_device
...
@@ -154,6 +154,13 @@ struct hip_device
std
::
size_t
get_cu_count
()
const
{
return
device_props
.
multiProcessorCount
;
}
std
::
size_t
get_cu_count
()
const
{
return
device_props
.
multiProcessorCount
;
}
std
::
size_t
get_max_workitems_per_cu
()
const
{
return
device_props
.
maxThreadsPerMultiProcessor
;
}
std
::
size_t
get_max_workitems_per_block
()
const
{
return
device_props
.
maxThreadsPerBlock
;
}
private:
private:
std
::
size_t
device_id
=
0
;
std
::
size_t
device_id
=
0
;
std
::
size_t
current_stream
=
0
;
std
::
size_t
current_stream
=
0
;
...
@@ -235,6 +242,8 @@ struct context
...
@@ -235,6 +242,8 @@ struct context
this
->
current_device
=
std
::
make_shared
<
hip_device
>
(
0
,
n_streams
);
this
->
current_device
=
std
::
make_shared
<
hip_device
>
(
0
,
n_streams
);
}
}
any_ptr
get_queue
()
{
return
get_stream
().
get
();
}
private:
private:
// TODO: Make this a vector to support multiple devices
// TODO: Make this a vector to support multiple devices
std
::
shared_ptr
<
hip_device
>
current_device
;
std
::
shared_ptr
<
hip_device
>
current_device
;
...
...
src/targets/gpu/include/migraphx/gpu/device/prefix_scan_sum.hpp
100755 → 100644
View file @
11e155c2
...
@@ -10,7 +10,12 @@ inline namespace MIGRAPHX_INLINE_NS {
...
@@ -10,7 +10,12 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace
gpu
{
namespace
gpu
{
namespace
device
{
namespace
device
{
void
prefix_scan_sum
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
,
int32_t
axis
);
void
prefix_scan_sum
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
,
int32_t
axis
,
bool
exclusive
,
bool
reverse
);
}
// namespace device
}
// namespace device
}
// namespace gpu
}
// namespace gpu
...
...
src/targets/gpu/include/migraphx/gpu/eliminate_workspace.hpp
View file @
11e155c2
...
@@ -14,7 +14,7 @@ namespace gpu {
...
@@ -14,7 +14,7 @@ namespace gpu {
struct
eliminate_workspace
struct
eliminate_workspace
{
{
std
::
string
name
()
const
{
return
"eliminate_workspace"
;
}
std
::
string
name
()
const
{
return
"eliminate_workspace"
;
}
void
apply
(
module
&
p
)
const
;
void
apply
(
module
&
m
)
const
;
};
};
}
// namespace gpu
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace MIGRAPHX_INLINE_NS
...
...
src/targets/gpu/include/migraphx/gpu/fuse_ops.hpp
View file @
11e155c2
...
@@ -16,7 +16,7 @@ struct fuse_ops
...
@@ -16,7 +16,7 @@ struct fuse_ops
context
*
ctx
=
nullptr
;
context
*
ctx
=
nullptr
;
bool
fast_math
=
true
;
bool
fast_math
=
true
;
std
::
string
name
()
const
{
return
"gpu::fuse_ops"
;
}
std
::
string
name
()
const
{
return
"gpu::fuse_ops"
;
}
void
apply
(
module
&
p
)
const
;
void
apply
(
module
&
m
)
const
;
};
};
}
// namespace gpu
}
// namespace gpu
...
...
src/targets/gpu/include/migraphx/gpu/gemm.hpp
View file @
11e155c2
...
@@ -18,6 +18,8 @@ namespace gpu {
...
@@ -18,6 +18,8 @@ namespace gpu {
struct
context
;
struct
context
;
void
blas_shape
(
const
shape
&
s
);
template
<
class
Op
>
template
<
class
Op
>
struct
rocblas_gemm
struct
rocblas_gemm
{
{
...
@@ -25,6 +27,7 @@ struct rocblas_gemm
...
@@ -25,6 +27,7 @@ struct rocblas_gemm
float
alpha
=
1
;
float
alpha
=
1
;
float
beta
=
0
;
float
beta
=
0
;
bool
int8_x4_format
=
true
;
bool
int8_x4_format
=
true
;
bool
compute_fp32
=
false
;
template
<
class
Self
,
class
F
>
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
static
auto
reflect
(
Self
&
self
,
F
f
)
...
@@ -49,13 +52,14 @@ struct rocblas_gemm
...
@@ -49,13 +52,14 @@ struct rocblas_gemm
std
::
vector
<
shape
>
in_shapes
(
inputs
);
std
::
vector
<
shape
>
in_shapes
(
inputs
);
in_shapes
.
pop_back
();
in_shapes
.
pop_back
();
check_shapes
{
in_shapes
,
*
this
}.
not_broadcasted
();
check_shapes
{
in_shapes
,
*
this
}.
not_broadcasted
();
b
atch_not_transposed
(
inputs
[
0
].
strides
()
);
b
las_shape
(
inputs
[
0
]
);
b
atch_not_transposed
(
inputs
[
1
].
strides
()
);
b
las_shape
(
inputs
[
1
]
);
// if gemm and add are fused
// if gemm and add are fused
if
(
not
float_equal
(
beta
,
0
)
)
if
(
in_shapes
.
size
()
>
2
)
{
{
auto
cmat_shape
=
in_shapes
.
back
();
auto
cmat_shape
=
in_shapes
.
back
();
in_shapes
.
pop_back
();
in_shapes
.
pop_back
();
blas_shape
(
cmat_shape
);
auto
op_out_shape
=
op
.
compute_shape
(
in_shapes
);
auto
op_out_shape
=
op
.
compute_shape
(
in_shapes
);
if
(
cmat_shape
.
lens
()
!=
op_out_shape
.
lens
())
if
(
cmat_shape
.
lens
()
!=
op_out_shape
.
lens
())
{
{
...
@@ -70,6 +74,7 @@ struct rocblas_gemm
...
@@ -70,6 +74,7 @@ struct rocblas_gemm
to_string
(
cmat_shape
.
type
())
+
to_string
(
cmat_shape
.
type
())
+
", it must be: "
+
to_string
(
op_out_shape
.
type
()));
", it must be: "
+
to_string
(
op_out_shape
.
type
()));
}
}
return
op_out_shape
;
}
}
return
op
.
compute_shape
(
in_shapes
);
return
op
.
compute_shape
(
in_shapes
);
...
@@ -80,37 +85,21 @@ struct rocblas_gemm
...
@@ -80,37 +85,21 @@ struct rocblas_gemm
{
{
if
(
this
->
name
()
==
"gpu::gemm"
)
if
(
this
->
name
()
==
"gpu::gemm"
)
{
{
gemm
(
ctx
,
output_shape
,
args
,
alpha
,
beta
,
int8_x4_format
);
gemm
(
ctx
,
output_shape
,
args
,
alpha
,
beta
,
int8_x4_format
,
compute_fp32
);
}
}
else
else
{
{
gemm
(
ctx
,
output_shape
,
args
,
int32_t
(
alpha
),
int32_t
(
beta
),
int8_x4_format
);
gemm
(
ctx
,
output_shape
,
args
,
int32_t
(
alpha
),
int32_t
(
beta
),
int8_x4_format
,
compute_fp32
);
}
}
return
args
.
back
();
return
args
.
back
();
}
}
void
batch_not_transposed
(
const
std
::
vector
<
std
::
size_t
>&
strides
)
const
{
if
(
strides
.
size
()
<=
2
)
return
;
auto
dim_0
=
strides
.
size
()
-
2
;
auto
matrix_size
=
std
::
max
(
strides
[
dim_0
],
strides
[
dim_0
+
1
]);
std
::
vector
<
std
::
size_t
>
batch
(
strides
.
begin
(),
strides
.
begin
()
+
dim_0
);
if
(
std
::
all_of
(
batch
.
begin
(),
batch
.
end
(),
[
&
](
auto
i
)
{
return
(
i
<
matrix_size
);
}))
{
MIGRAPHX_THROW
(
"GPU_GEMM: matrix size and batch size {"
+
to_string_range
(
strides
)
+
"} are transposed!"
);
}
if
(
std
::
adjacent_find
(
batch
.
begin
(),
batch
.
end
(),
[
&
](
auto
i
,
auto
j
)
{
return
(
i
<
j
or
i
<
matrix_size
or
j
<
matrix_size
);
})
!=
batch
.
end
())
{
MIGRAPHX_THROW
(
"GPU_GEMM: batch size {"
+
to_string_range
(
strides
)
+
"} is transposed!"
);
}
}
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
{
return
shapes
.
size
()
-
1
;
return
shapes
.
size
()
-
1
;
...
...
src/targets/gpu/include/migraphx/gpu/gemm_impl.hpp
View file @
11e155c2
...
@@ -14,13 +14,15 @@ void gemm(context& ctx,
...
@@ -14,13 +14,15 @@ void gemm(context& ctx,
const
std
::
vector
<
argument
>&
args
,
const
std
::
vector
<
argument
>&
args
,
float
alpha
,
float
alpha
,
float
beta
,
float
beta
,
bool
int8_x4_format
);
bool
int8_x4_format
,
bool
compute_fp32
);
void
gemm
(
context
&
ctx
,
void
gemm
(
context
&
ctx
,
const
shape
&
output_shape
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
,
const
std
::
vector
<
argument
>&
args
,
int32_t
alpha
,
int32_t
alpha
,
int32_t
beta
,
int32_t
beta
,
bool
int8_x4_format
);
bool
int8_x4_format
,
bool
compute_fp32
);
}
// namespace gpu
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace MIGRAPHX_INLINE_NS
...
...
src/targets/gpu/include/migraphx/gpu/miopen.hpp
100755 → 100644
View file @
11e155c2
...
@@ -9,6 +9,8 @@
...
@@ -9,6 +9,8 @@
#include <miopen/miopen.h>
#include <miopen/miopen.h>
#include <migraphx/config.hpp>
#include <migraphx/config.hpp>
#include <sstream>
#ifdef HAS_FIND_MODE_API
#ifdef HAS_FIND_MODE_API
extern
"C"
miopenStatus_t
miopenHiddenSetConvolutionFindMode
(
miopenConvolutionDescriptor_t
convDesc
,
extern
"C"
miopenStatus_t
miopenHiddenSetConvolutionFindMode
(
miopenConvolutionDescriptor_t
convDesc
,
int
findMode
);
int
findMode
);
...
@@ -132,12 +134,16 @@ inline convolution_descriptor make_deconv(const T& op)
...
@@ -132,12 +134,16 @@ inline convolution_descriptor make_deconv(const T& op)
inline
pooling_descriptor
make_pooling
(
const
migraphx
::
op
::
pooling
&
op
)
inline
pooling_descriptor
make_pooling
(
const
migraphx
::
op
::
pooling
&
op
)
{
{
miopenPoolingMode_t
mode
;
miopenPoolingMode_t
mode
;
if
(
op
.
mode
==
"
max
"
)
if
(
op
.
mode
==
op
::
pooling_mode
::
max
)
mode
=
miopenPoolingMax
;
mode
=
miopenPoolingMax
;
else
if
(
op
.
mode
==
"
average
"
)
else
if
(
op
.
mode
==
op
::
pooling_mode
::
average
)
mode
=
miopenPoolingAverage
;
mode
=
miopenPoolingAverage
;
else
else
MIGRAPHX_THROW
(
"Unknown mode for pooling: "
+
op
.
mode
);
{
std
::
stringstream
ss
(
"Unknown mode for pooling: "
);
ss
<<
op
.
mode
;
MIGRAPHX_THROW
(
ss
.
str
());
}
auto
p
=
make_obj
<
pooling_descriptor
>
(
&
miopenCreatePoolingDescriptor
);
auto
p
=
make_obj
<
pooling_descriptor
>
(
&
miopenCreatePoolingDescriptor
);
int
kdims
=
op
.
kdims
();
int
kdims
=
op
.
kdims
();
...
...
src/targets/gpu/include/migraphx/gpu/prefix_scan_sum.hpp
View file @
11e155c2
...
@@ -40,9 +40,8 @@ struct hip_prefix_scan_sum : oper<hip_prefix_scan_sum>
...
@@ -40,9 +40,8 @@ struct hip_prefix_scan_sum : oper<hip_prefix_scan_sum>
argument
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
argument
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
{
{
if
(
op
.
exclusive
or
op
.
reverse
)
device
::
prefix_scan_sum
(
MIGRAPHX_THROW
(
"Exclusive and reverse scan not supported"
);
ctx
.
get_stream
().
get
(),
args
[
1
],
args
[
0
],
op
.
axis
,
op
.
exclusive
,
op
.
reverse
);
device
::
prefix_scan_sum
(
ctx
.
get_stream
().
get
(),
args
[
1
],
args
[
0
],
op
.
axis
);
return
args
[
1
];
return
args
[
1
];
}
}
...
...
src/targets/gpu/include/migraphx/gpu/
compile_roialign
.hpp
→
src/targets/gpu/include/migraphx/gpu/
prefuse_ops
.hpp
View file @
11e155c2
#ifndef MIGRAPHX_GUARD_GPU_
COMPILE_ROIALIGN
_HPP
#ifndef MIGRAPHX_GUARD_GPU_
PREFUSE_OPS
_HPP
#define MIGRAPHX_GUARD_GPU_
COMPILE_ROIALIGN
_HPP
#define MIGRAPHX_GUARD_GPU_
PREFUSE_OPS
_HPP
#include <migraphx/config.hpp>
#include <migraphx/config.hpp>
#include <migraphx/
operation
.hpp>
#include <migraphx/
gpu/context
.hpp>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
struct
module
;
namespace
gpu
{
namespace
gpu
{
struct
context
;
struct
prefuse_ops
operation
compile_roialign
(
context
&
ctx
,
const
std
::
vector
<
shape
>&
io_shapes
,
const
value
&
val
);
{
std
::
string
name
()
const
{
return
"gpu::prefuse_ops"
;
}
void
apply
(
module
&
m
)
const
;
};
}
// namespace gpu
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
}
// namespace migraphx
#endif // MIGRAPHX_GUARD_GPU_COMPILE_ROIALIGN_HPP
#endif // MIGRAPHX_GUARD_GPU_PREFUSE_OPS_HPP
src/targets/gpu/include/migraphx/gpu/quant_convolution.hpp
View file @
11e155c2
...
@@ -2,6 +2,7 @@
...
@@ -2,6 +2,7 @@
#define MIGRAPHX_GUARD_RTGLIB_QUANT_CONVOLUTION_HPP
#define MIGRAPHX_GUARD_RTGLIB_QUANT_CONVOLUTION_HPP
#include <migraphx/shape.hpp>
#include <migraphx/shape.hpp>
#include <migraphx/reflect.hpp>
#include <migraphx/op/quant_convolution.hpp>
#include <migraphx/op/quant_convolution.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <migraphx/gpu/miopen.hpp>
...
@@ -14,6 +15,7 @@ struct context;
...
@@ -14,6 +15,7 @@ struct context;
struct
miopen_quant_convolution
struct
miopen_quant_convolution
{
{
op
::
quant_convolution
op
;
op
::
quant_convolution
op
;
bool
int8_x4_format
=
false
;
shared
<
convolution_descriptor
>
cd
;
shared
<
convolution_descriptor
>
cd
;
miopenConvFwdAlgorithm_t
algo
{};
miopenConvFwdAlgorithm_t
algo
{};
miopenHandle_t
handle
=
nullptr
;
miopenHandle_t
handle
=
nullptr
;
...
@@ -22,7 +24,8 @@ struct miopen_quant_convolution
...
@@ -22,7 +24,8 @@ struct miopen_quant_convolution
static
auto
reflect
(
Self
&
self
,
F
f
)
static
auto
reflect
(
Self
&
self
,
F
f
)
{
{
// TODO: Add algo
// TODO: Add algo
return
op
::
quant_convolution
::
reflect
(
self
.
op
,
f
);
return
pack_join
(
migraphx
::
reflect
(
self
.
op
,
f
),
pack
(
f
(
self
.
int8_x4_format
,
"int8_x4_format"
)));
}
}
std
::
string
name
()
const
{
return
"gpu::quant_convolution"
;
}
std
::
string
name
()
const
{
return
"gpu::quant_convolution"
;
}
...
...
src/targets/gpu/include/migraphx/gpu/scatter.hpp
View file @
11e155c2
...
@@ -3,7 +3,7 @@
...
@@ -3,7 +3,7 @@
#include <migraphx/argument.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/reflect.hpp>
#include <migraphx/reflect.hpp>
#include <migraphx/op/scatter.hpp>
#include <migraphx/op/scatter
_none
.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <migraphx/gpu/miopen.hpp>
namespace
migraphx
{
namespace
migraphx
{
...
@@ -14,7 +14,9 @@ struct context;
...
@@ -14,7 +14,9 @@ struct context;
struct
hip_scatter
struct
hip_scatter
{
{
op
::
scatter
op
;
// scatter_none is an exact replacement for previous op::scatter,
// renamed to match an Onnx option. Don't use base class op::scatter
op
::
scatter_none
op
;
template
<
class
Self
,
class
F
>
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
static
auto
reflect
(
Self
&
self
,
F
f
)
...
@@ -22,7 +24,7 @@ struct hip_scatter
...
@@ -22,7 +24,7 @@ struct hip_scatter
return
migraphx
::
reflect
(
self
.
op
,
f
);
return
migraphx
::
reflect
(
self
.
op
,
f
);
}
}
std
::
string
name
()
const
{
return
"gpu::scatter"
;
}
std
::
string
name
()
const
{
return
"gpu::scatter
_none
"
;
}
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
;
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
;
argument
argument
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
;
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
;
...
...
src/targets/gpu/include/migraphx/gpu/schedule_model.hpp
View file @
11e155c2
...
@@ -17,9 +17,9 @@ struct schedule_model
...
@@ -17,9 +17,9 @@ struct schedule_model
{
{
std
::
size_t
streams
=
0
;
std
::
size_t
streams
=
0
;
std
::
size_t
concurrency
()
const
;
std
::
size_t
concurrency
()
const
;
void
sched
(
module
&
p
,
instruction_ref
ins
,
std
::
size_t
n
)
const
;
void
sched
(
module
&
m
,
instruction_ref
ins
,
std
::
size_t
n
)
const
;
void
wait
(
module
&
p
,
instruction_ref
ins
,
std
::
size_t
wait_id
)
const
;
void
wait
(
module
&
m
,
instruction_ref
ins
,
std
::
size_t
wait_id
)
const
;
void
record
(
module
&
p
,
instruction_ref
ins
,
std
::
size_t
wait_id
)
const
;
void
record
(
module
&
m
,
instruction_ref
ins
,
std
::
size_t
wait_id
)
const
;
std
::
size_t
weight
(
const
operation
&
op
)
const
;
std
::
size_t
weight
(
const
operation
&
op
)
const
;
};
};
...
...
src/targets/gpu/include/migraphx/gpu/sync_device.hpp
View file @
11e155c2
...
@@ -15,7 +15,7 @@ namespace gpu {
...
@@ -15,7 +15,7 @@ namespace gpu {
struct
sync_device
struct
sync_device
{
{
std
::
string
name
()
const
{
return
"sync_device"
;
}
std
::
string
name
()
const
{
return
"sync_device"
;
}
void
apply
(
module
&
p
)
const
;
void
apply
(
module
&
m
)
const
;
};
};
}
// namespace gpu
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace MIGRAPHX_INLINE_NS
...
...
src/targets/gpu/include/migraphx/gpu/write_literals.hpp
View file @
11e155c2
...
@@ -14,7 +14,7 @@ struct write_literals
...
@@ -14,7 +14,7 @@ struct write_literals
context
*
ctx
=
nullptr
;
context
*
ctx
=
nullptr
;
std
::
string
name
()
const
{
return
"gpu::write_literals"
;
}
std
::
string
name
()
const
{
return
"gpu::write_literals"
;
}
void
apply
(
module
&
p
)
const
;
void
apply
(
module
&
m
)
const
;
};
};
}
// namespace gpu
}
// namespace gpu
...
...
src/targets/gpu/jit/gathernd.cpp
0 → 100644
View file @
11e155c2
#include <migraphx/gpu/compiler.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/compile_hip_code_object.hpp>
#include <migraphx/gpu/compile_hip.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/reduce_dims.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/dead_code_elimination.hpp>
#include <migraphx/eliminate_common_subexpression.hpp>
#include <migraphx/module.hpp>
#include <migraphx/pass_manager.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
// NOLINTNEXTLINE
static
const
char
*
const
gathernd_kernel
=
R"__migraphx__(
#include <migraphx/kernels/gathernd.hpp>
#include <migraphx/kernels/ops.hpp>
#include <migraphx/kernels/integral_constant.hpp>
#include <migraphx/kernels/generic_constant.hpp>
#include <args.hpp>
namespace migraphx {
extern "C" {
__global__ void gathernd_kernel(void* in_data, void* in_indices, void* output)
{
make_tensors()(in_data, in_indices, output)([](auto&&... xs) {
auto settings = make_gathernd_settings(MIGRAPHX_MAKE_CONSTANT(int64_t{BATCH_DIMS}));
gathernd(xs..., settings);
});
}
}
} // namespace migraphx
)__migraphx__"
;
struct
gathernd_compiler
:
compiler
<
gathernd_compiler
>
{
std
::
vector
<
std
::
string
>
names
()
const
{
return
{
"gathernd"
};
}
operation
compile_op
(
context
&
ctx
,
const
std
::
vector
<
shape
>&
inputs
,
const
value
&
v
)
const
{
hip_compile_options
options
;
auto
out_s
=
inputs
.
back
();
options
.
set_launch_params
(
v
,
compute_global_for
(
ctx
,
out_s
.
elements
()));
options
.
inputs
=
inputs
;
options
.
output
=
out_s
;
options
.
kernel_name
=
"gathernd_kernel"
;
options
.
virtual_inputs
=
inputs
;
// batch_dims
assert
(
v
.
contains
(
"batch_dims"
));
auto
batch_dims
=
v
.
at
(
"batch_dims"
).
to
<
int64_t
>
();
options
.
params
+=
" -DBATCH_DIMS="
+
std
::
to_string
(
batch_dims
);
return
compile_hip_code_object
(
gathernd_kernel
,
options
);
}
compiler_replace
compile
(
context
&
ctx
,
instruction_ref
ins
,
const
operation
&
op
)
const
{
return
replace
(
compile_op
(
ctx
,
to_shapes
(
ins
->
inputs
()),
op
.
to_value
()));
}
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/jit/pointwise.cpp
0 → 100644
View file @
11e155c2
#include <migraphx/gpu/compiler.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/compile_hip_code_object.hpp>
#include <migraphx/gpu/compile_hip.hpp>
#include <migraphx/gpu/compile_gen.hpp>
#include <migraphx/cpp_generator.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/reduce_dims.hpp>
#include <migraphx/permutation.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/dead_code_elimination.hpp>
#include <migraphx/eliminate_common_subexpression.hpp>
#include <migraphx/module.hpp>
#include <migraphx/pass_manager.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
using
namespace
migraphx
::
gpu
::
gen
;
// NOLINT
static
const
char
*
const
pointwise_kernel
=
R"__migraphx__(
#include <migraphx/kernels/index.hpp>
#include <migraphx/kernels/pointwise.hpp>
#include <args.hpp>
namespace migraphx {
${preamble}
extern "C" {
__global__ void ${kernel}(${params})
{
auto idx = make_index();
pointwise(idx, ${transformers})(${lambda}, ${args});
}
}
} // namespace migraphx
)__migraphx__"
;
static
std
::
vector
<
std
::
string
>
get_op_names
(
const
module
&
m
)
{
std
::
vector
<
std
::
string
>
result
;
for
(
auto
&
ins
:
m
)
{
if
(
starts_with
(
ins
.
name
(),
"@"
))
continue
;
result
.
push_back
(
ins
.
name
());
}
return
result
;
}
struct
pointwise_compiler
:
compiler
<
pointwise_compiler
>
{
std
::
vector
<
std
::
string
>
names
()
const
{
return
{
"pointwise"
};
}
static
std
::
size_t
oversubscribe_if
(
bool
b
)
{
if
(
b
)
return
256
;
else
return
1
;
}
operation
compile_op
(
context
&
ctx
,
const
std
::
vector
<
shape
>&
inputs
,
const
value
&
v
)
const
{
hip_compile_options
options
;
options
.
inputs
=
inputs
;
options
.
output
=
inputs
.
back
();
options
.
virtual_inputs
=
reduce_dims
(
inputs
);
options
.
params
=
"-Wno-float-equal"
;
auto
axis
=
find_fast_axis
(
options
.
virtual_inputs
);
auto
vec
=
vectorize
::
elements
(
axis
,
options
.
virtual_inputs
);
auto
preloads
=
preload
::
broadcasts
(
axis
,
options
.
virtual_inputs
);
options
.
kernel_name
=
v
.
get
(
"kernel"
,
"kernel"
);
options
.
set_launch_params
(
v
,
compute_global_for
(
ctx
,
options
.
output
.
elements
()
/
vec
.
size
,
oversubscribe_if
(
not
preloads
.
is_preloading
())));
auto
src
=
interpolate_string
(
pointwise_kernel
,
{{
"kernel"
,
options
.
kernel_name
},
{
"params"
,
enum_params
(
inputs
.
size
(),
"void * private_p"
)},
{
"args"
,
enum_params
(
inputs
.
size
(),
"private_p"
)},
{
"lambda"
,
v
.
at
(
"lambda"
).
to
<
std
::
string
>
()},
{
"transformers"
,
make_transformer_args
(
preloads
,
vec
)},
{
"preamble"
,
v
.
get
(
"preamble"
,
std
::
string
{})}});
return
compile_hip_code_object
(
src
,
options
);
}
compiler_replace
compile
(
context
&
ctx
,
instruction_ref
ins
,
const
operation
&
)
const
{
assert
(
not
ins
->
module_inputs
().
empty
());
auto
*
pm
=
ins
->
module_inputs
().
front
();
run_passes
(
*
pm
,
{
eliminate_common_subexpression
{},
dead_code_elimination
{}});
cpp_generator
g
;
g
.
fmap
([](
const
std
::
string
&
fname
)
{
return
"migraphx::"
+
fname
;
});
g
.
add_point_op
(
"where"
,
"${function:where}(${0}, ${1}, ${2})"
);
g
.
add_point_op
(
"prelu"
,
"${function:where}(${0} < 0, ${0} * ${1}, ${0})"
);
g
.
add_point_op
(
"sign"
,
"${function:where}(${0} > 0, 1, ${function:where}(${0} < 0, -1, 0))"
);
g
.
add_point_op
(
"equal"
,
"migraphx::abs(${0} == ${1})"
);
g
.
add_point_op
(
"less"
,
"migraphx::abs(${0} < ${1})"
);
g
.
add_point_op
(
"greater"
,
"migraphx::abs(${0} > ${1})"
);
g
.
add_point_op
(
"not"
,
"migraphx::abs(not ${0})"
);
// Add explict conversions
g
.
fresult
(
[](
const
shape
&
s
)
{
return
"migraphx::convert<"
+
shape
::
cpp_type
(
s
.
type
())
+
">"
;
});
auto
name
=
g
.
create_function
(
g
.
generate_module
(
*
pm
).
set_attributes
({
"__device__"
}).
set_generic_types
(
*
pm
));
std
::
string
lambda
=
"MIGRAPHX_LIFT("
+
name
+
")"
;
auto
op_names
=
get_op_names
(
*
pm
);
op_names
.
push_back
(
"kernel"
);
auto
op_name_string
=
join_strings
(
op_names
,
"_"
);
return
replace
(
compile_op
(
ctx
,
to_shapes
(
ins
->
inputs
()),
{{
"lambda"
,
lambda
},
{
"preamble"
,
g
.
str
()},
{
"kernel"
,
op_name_string
}}));
}
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/jit/reduce.cpp
0 → 100644
View file @
11e155c2
#include <migraphx/gpu/compiler.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/compile_hip_code_object.hpp>
#include <migraphx/gpu/compile_hip.hpp>
#include <migraphx/gpu/compile_gen.hpp>
#include <migraphx/cpp_generator.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/reduce_dims.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/dead_code_elimination.hpp>
#include <migraphx/eliminate_common_subexpression.hpp>
#include <migraphx/module.hpp>
#include <migraphx/pass_manager.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
using
namespace
migraphx
::
gpu
::
gen
;
// NOLINT
static
const
char
*
const
simple_reduce_kernel
=
R"__migraphx__(
#include <migraphx/kernels/index.hpp>
#include <migraphx/kernels/reduce.hpp>
#include <migraphx/kernels/vectorize.hpp>
#include <args.hpp>
namespace migraphx {
${preamble}
extern "C" {
__global__ void reduce_kernel(void* input_p, void* output_p)
{
transform_args(make_tensors(), ${transformers})(input_p, output_p)([](auto input, auto output) {
simple_reduce<reduce::${algo}>(${reduction}, ${init}, input, output, ${read}, ${write});
});
}
}
} // namespace migraphx
)__migraphx__"
;
static
std
::
size_t
get_reduce_elements
(
const
std
::
vector
<
shape
>&
inputs
)
{
return
inputs
.
front
().
elements
()
/
inputs
.
back
().
elements
();
}
static
std
::
size_t
get_reduce_elements
(
const
std
::
vector
<
instruction_ref
>&
inputs
)
{
return
get_reduce_elements
(
to_shapes
(
inputs
));
}
static
std
::
vector
<
std
::
size_t
>
get_reduce_lens
(
const
std
::
vector
<
std
::
size_t
>&
input_lens
,
const
std
::
vector
<
std
::
size_t
>&
output_lens
)
{
std
::
vector
<
std
::
size_t
>
reduce_lens
;
std
::
transform
(
output_lens
.
begin
(),
output_lens
.
end
(),
input_lens
.
begin
(),
std
::
back_inserter
(
reduce_lens
),
[](
auto
x
,
auto
y
)
->
std
::
size_t
{
if
(
x
==
y
)
return
1
;
else
return
y
;
});
return
reduce_lens
;
}
static
std
::
string
get_reduce_algo
(
const
std
::
vector
<
shape
>&
inputs
)
{
auto
rlens
=
get_reduce_lens
(
inputs
.
front
().
lens
(),
inputs
.
back
().
lens
());
const
auto
init
=
std
::
numeric_limits
<
std
::
size_t
>::
max
();
// The minimum stride
auto
min_stride
=
std
::
inner_product
(
rlens
.
begin
(),
rlens
.
end
(),
inputs
.
front
().
strides
().
begin
(),
init
,
[](
auto
x
,
auto
y
)
{
return
std
::
min
(
x
,
y
);
},
[](
auto
len
,
auto
stride
)
{
return
len
==
1
?
init
:
stride
;
});
if
(
min_stride
>
2
)
return
"lane"
;
return
"block"
;
}
struct
reduce_compiler
:
compiler
<
reduce_compiler
>
{
std
::
vector
<
std
::
string
>
names
()
const
{
return
{
"reduce"
,
"reduce_sum"
,
"reduce_mean"
,
"reduce_max"
,
"reduce_min"
,
"reduce_prod"
};
}
operation
compile_op
(
context
&
ctx
,
const
std
::
vector
<
shape
>&
inputs
,
const
value
&
v
)
const
{
hip_compile_options
options
;
options
.
inputs
=
inputs
;
options
.
output
=
inputs
.
back
();
options
.
virtual_inputs
=
reduce_dims
(
inputs
);
auto
faxis
=
find_fast_axis
({
options
.
virtual_inputs
.
front
()});
vectorize
vec
{};
// Vectorize if the axis is a reduction axis
if
(
options
.
virtual_inputs
.
back
().
lens
()[
faxis
]
==
1
)
{
vec
=
vectorize
::
elements
(
faxis
,
options
.
virtual_inputs
);
}
auto
relements
=
get_reduce_elements
(
options
.
virtual_inputs
)
/
vec
.
size
;
auto
nelements
=
options
.
virtual_inputs
.
back
().
elements
();
auto
algo
=
v
.
get
(
"algo"
,
get_reduce_algo
(
options
.
virtual_inputs
));
if
(
algo
==
"block"
)
{
auto
block_size
=
compute_block_size
(
relements
,
256
);
options
.
set_launch_params
(
v
,
compute_global_for
(
ctx
,
nelements
*
block_size
,
256
),
block_size
);
}
else
if
(
algo
==
"lane"
)
{
options
.
set_launch_params
(
v
,
compute_global_for
(
ctx
,
nelements
,
256
));
}
else
{
MIGRAPHX_THROW
(
"Unknown reduce algo: "
+
algo
);
}
options
.
kernel_name
=
"reduce_kernel"
;
std
::
string
identity
=
"[](auto x) { return x; }"
;
auto
src
=
interpolate_string
(
simple_reduce_kernel
,
{{
"reduction"
,
v
.
at
(
"reduction"
).
to
<
std
::
string
>
()},
{
"init"
,
v
.
get
(
"init"
,
std
::
string
{
"0"
})},
{
"read"
,
v
.
get
(
"read"
,
identity
)},
{
"write"
,
v
.
get
(
"write"
,
identity
)},
{
"algo"
,
algo
},
{
"transformers"
,
make_transformer_args
(
vec
)},
{
"preamble"
,
v
.
get
(
"preamble"
,
std
::
string
{})}});
options
.
params
+=
"-Wno-float-equal"
;
return
compile_hip_code_object
(
src
,
options
);
}
compiler_replace
compile
(
context
&
ctx
,
instruction_ref
ins
,
const
operation
&
op
)
const
{
value
v
=
value
::
object
{};
auto
reduce_elements
=
get_reduce_elements
(
ins
->
inputs
());
if
(
op
.
name
()
==
"reduce_sum"
)
{
v
[
"reduction"
]
=
"op::sum{}"
;
}
else
if
(
op
.
name
()
==
"reduce_mean"
)
{
v
[
"reduction"
]
=
"op::sum{}"
;
v
[
"write"
]
=
"op::mean{"
+
std
::
to_string
(
reduce_elements
)
+
"}"
;
}
else
if
(
op
.
name
()
==
"reduce_max"
)
{
v
[
"reduction"
]
=
"op::max{}"
;
v
[
"init"
]
=
"lowest{}"
;
}
else
if
(
op
.
name
()
==
"reduce_min"
)
{
v
[
"reduction"
]
=
"op::min{}"
;
v
[
"init"
]
=
"highest{}"
;
}
else
if
(
op
.
name
()
==
"reduce_prod"
)
{
v
[
"reduction"
]
=
"op::product{}"
;
v
[
"init"
]
=
"1"
;
}
else
{
MIGRAPHX_THROW
(
"Unsupported reduce"
);
}
return
replace
(
compile_op
(
ctx
,
to_shapes
(
ins
->
inputs
()),
v
));
}
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/
compile_
roialign.cpp
→
src/targets/gpu/
jit/
roialign.cpp
View file @
11e155c2
#include <migraphx/gpu/compile
_roialign
.hpp>
#include <migraphx/gpu/compile
r
.hpp>
#include <migraphx/gpu/compile_hip_code_object.hpp>
#include <migraphx/gpu/compile_hip_code_object.hpp>
#include <migraphx/gpu/compile_hip.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/compile_hip.hpp>
#include <migraphx/cpp_generator.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/reduce_dims.hpp>
#include <migraphx/reduce_dims.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/dead_code_elimination.hpp>
#include <migraphx/eliminate_common_subexpression.hpp>
#include <migraphx/module.hpp>
#include <migraphx/pass_manager.hpp>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
@@ -13,7 +19,6 @@ namespace gpu {
...
@@ -13,7 +19,6 @@ namespace gpu {
// NOLINTNEXTLINE
// NOLINTNEXTLINE
static
const
char
*
const
roialign_kernel
=
R"__migraphx__(
static
const
char
*
const
roialign_kernel
=
R"__migraphx__(
#include <migraphx/kernels/roialign.hpp>
#include <migraphx/kernels/roialign.hpp>
#include <migraphx/kernels/basic_ops.hpp>
#include <migraphx/kernels/integral_constant.hpp>
#include <migraphx/kernels/integral_constant.hpp>
#include <migraphx/kernels/generic_constant.hpp>
#include <migraphx/kernels/generic_constant.hpp>
#include <args.hpp>
#include <args.hpp>
...
@@ -37,46 +42,46 @@ __global__ void roialign_kernel(void* in_x, void* in_rois, void* in_ind, void* y
...
@@ -37,46 +42,46 @@ __global__ void roialign_kernel(void* in_x, void* in_rois, void* in_ind, void* y
} // namespace migraphx
} // namespace migraphx
int main() {}
)__migraphx__"
;
)__migraphx__"
;
operation
compile_
roialign
(
co
ntext
&
,
const
std
::
vector
<
shape
>&
io_shapes
,
const
value
&
val
)
struct
roialign
_
co
mpiler
:
compiler
<
roialign_compiler
>
{
{
hip_compile_options
options
;
std
::
vector
<
std
::
string
>
names
()
const
{
return
{
"roialign"
};
}
auto
out_s
=
io_shapes
.
back
();
options
.
local
=
128
;
operation
compile_op
(
context
&
ctx
,
const
std
::
vector
<
shape
>&
inputs
,
const
value
&
v
)
const
options
.
global
=
compute_global
(
out_s
.
elements
(),
options
.
local
);
{
options
.
inputs
=
io_shapes
;
hip_compile_options
options
;
options
.
output
=
out_s
;
options
.
set_launch_params
(
v
,
compute_global_for
(
ctx
,
inputs
.
back
().
elements
()),
128
);
options
.
kernel_name
=
"roialign_kernel"
;
options
.
output
=
inputs
.
back
();
options
.
virtual_inputs
=
io_shapes
;
options
.
inputs
=
inputs
;
options
.
kernel_name
=
"roialign_kernel"
;
// sampling_ratio
assert
(
val
.
contains
(
"sampling_ratio"
));
// sampling_ratio
auto
sampling_ratio
=
val
.
at
(
"sampling_ratio"
).
to
<
int64_t
>
();
options
.
params
+=
" -DSAMPLING_RATIO="
+
v
.
at
(
"sampling_ratio"
).
to
<
std
::
string
>
();
options
.
params
+=
" -DSAMPLING_RATIO="
+
std
::
to_string
(
sampling_ratio
);
// pooling_mode
// pooling_mode
auto
mode
=
v
.
at
(
"mode"
).
to
<
migraphx
::
op
::
pooling_mode
>
();
assert
(
val
.
contains
(
"mode"
));
std
::
string
is_avg_pooling
=
auto
mode
=
val
.
at
(
"mode"
).
to
<
std
::
string
>
();
(
mode
==
migraphx
::
op
::
pooling_mode
::
average
)
?
"true"
:
"false"
;
bool
is_avg_pooling
=
(
mode
==
"avg"
);
options
.
params
+=
" -DIS_AVG_POOLING="
+
is_avg_pooling
;
options
.
params
+=
" -DIS_AVG_POOLING="
+
std
::
to_string
(
static_cast
<
int
>
(
is_avg_pooling
));
// coord_trans_mode
// coord_trans_mode
auto
ctm
=
v
.
at
(
"coordinate_transformation_mode"
).
to
<
std
::
string
>
();
assert
(
val
.
contains
(
"coordinate_transformation_mode"
));
float
rois_offset
=
(
ctm
==
"output_half_pixel"
)
?
-
0.5
f
:
0.0
f
;
auto
ctm
=
val
.
at
(
"coordinate_transformation_mode"
).
to
<
std
::
string
>
();
options
.
params
+=
" -DROIS_OFFSET="
+
std
::
to_string
(
rois_offset
);
float
rois_offset
=
(
ctm
==
"output_half_pixel"
)
?
-
0.5
f
:
0.0
f
;
options
.
params
+=
" -DROIS_OFFSET="
+
std
::
to_string
(
rois_offset
);
// spatial_scale
options
.
params
+=
" -DSPATIAL_SCALE="
+
v
.
at
(
"spatial_scale"
).
to
<
std
::
string
>
();
// spatial_scale
assert
(
val
.
contains
(
"spatial_scale"
));
return
compile_hip_code_object
(
roialign_kernel
,
options
);
float
spatial_scale
=
val
.
at
(
"spatial_scale"
).
to
<
float
>
();
}
options
.
params
+=
" -DSPATIAL_SCALE="
+
std
::
to_string
(
spatial_scale
);
compiler_replace
compile
(
context
&
ctx
,
instruction_ref
ins
,
const
operation
&
op
)
const
return
compile_hip_code_object
(
roialign_kernel
,
options
);
{
}
return
replace
(
compile_op
(
ctx
,
to_shapes
(
ins
->
inputs
()),
op
.
to_value
()));
}
// namespace gpu
}
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
}
// namespace migraphx
src/targets/gpu/jit/scatternd.cpp
0 → 100644
View file @
11e155c2
#include <migraphx/gpu/compiler.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/compile_hip_code_object.hpp>
#include <migraphx/gpu/compile_hip.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/reduce_dims.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/dead_code_elimination.hpp>
#include <migraphx/eliminate_common_subexpression.hpp>
#include <migraphx/module.hpp>
#include <migraphx/pass_manager.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
// NOLINTNEXTLINE
static
const
char
*
const
scatternd_kernel
=
R"__migraphx__(
#include <migraphx/kernels/scatternd.hpp>
#include <migraphx/kernels/integral_constant.hpp>
#include <migraphx/kernels/generic_constant.hpp>
#include <args.hpp>
namespace migraphx {
extern "C" {
__global__ void scatternd_kernel(void* in_indices, void* in_updates, void* output)
{
make_tensors()(in_indices, in_updates, output)([](auto&&... xs) {
scatternd(xs..., ${reduction}{});
});
}
}
} // namespace migraphx
)__migraphx__"
;
struct
scatternd_compiler
:
compiler
<
scatternd_compiler
>
{
std
::
vector
<
std
::
string
>
names
()
const
{
return
{
"scatternd_none"
,
"scatternd_add"
,
"scatternd_mul"
};
}
operation
compile_op
(
context
&
ctx
,
const
std
::
vector
<
shape
>&
inputs
,
const
value
&
v
)
const
{
hip_compile_options
options
;
options
.
set_launch_params
(
v
,
compute_global_for
(
ctx
,
inputs
.
at
(
1
).
elements
()));
options
.
inputs
=
inputs
;
options
.
output
=
inputs
.
back
();
options
.
kernel_name
=
"scatternd_kernel"
;
options
.
virtual_inputs
=
inputs
;
auto
reduction
=
"assign_"
+
v
.
get
(
"reduction"
,
std
::
string
{
"none"
});
auto
src
=
interpolate_string
(
scatternd_kernel
,
{{
"reduction"
,
reduction
}});
return
compile_hip_code_object
(
src
,
options
);
}
compiler_replace
compile
(
context
&
ctx
,
instruction_ref
ins
,
const
operation
&
op
)
const
{
assert
(
starts_with
(
op
.
name
(),
"scatternd_"
));
auto
reduction
=
op
.
name
().
substr
(
10
);
return
insert
(
compile_op
(
ctx
,
to_shapes
({
ins
->
inputs
().
begin
()
+
1
,
ins
->
inputs
().
end
()}),
{{
"reduction"
,
reduction
}}));
}
compiler_replace
insert
(
const
operation
&
op
)
const
{
return
[
=
](
module
&
m
,
instruction_ref
ins
)
{
auto
args
=
ins
->
inputs
();
args
.
back
()
=
m
.
insert_instruction
(
ins
,
make_op
(
"hip::copy"
),
args
.
front
(),
args
.
back
());
args
.
erase
(
args
.
begin
());
return
m
.
replace_instruction
(
ins
,
op
,
args
);
};
}
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/kernel.cpp
View file @
11e155c2
...
@@ -59,6 +59,8 @@ void launch_kernel(hipFunction_t fun,
...
@@ -59,6 +59,8 @@ void launch_kernel(hipFunction_t fun,
void
*
kernargs
,
void
*
kernargs
,
std
::
size_t
size
)
std
::
size_t
size
)
{
{
assert
(
global
>
0
);
assert
(
local
>
0
);
void
*
config
[]
=
{
void
*
config
[]
=
{
// HIP_LAUNCH_PARAM_* are macros that do horrible things
// HIP_LAUNCH_PARAM_* are macros that do horrible things
#ifdef MIGRAPHX_USE_CLANG_TIDY
#ifdef MIGRAPHX_USE_CLANG_TIDY
...
...
Prev
1
…
7
8
9
10
11
12
13
14
15
…
20
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment