Unverified Commit c45b34c3 authored by Paul Fultz II's avatar Paul Fultz II Committed by GitHub
Browse files

Use MIGRAPHX_GLOBAL (#1918)

This will also annotate the function with the block size so the compiler can do a better job of optimizing.
parent 072fd5cc
...@@ -66,7 +66,7 @@ ${preamble} ...@@ -66,7 +66,7 @@ ${preamble}
extern "C" { extern "C" {
__global__ void ${kernel}(${params}) MIGRAPHX_GLOBAL void ${kernel}(${params})
{ {
transform_args(make_tensors(), rotate_last())(${args})([](auto... xs) { transform_args(make_tensors(), rotate_last())(${args})([](auto... xs) {
ck_gemm<${solution}, ${blocks_per_batch}>(xs...); ck_gemm<${solution}, ${blocks_per_batch}>(xs...);
......
...@@ -47,7 +47,7 @@ ${preamble} ...@@ -47,7 +47,7 @@ ${preamble}
extern "C" { extern "C" {
__global__ void ${kernel}(${params}) MIGRAPHX_GLOBAL void ${kernel}(${params})
{ {
transform_args(make_tensors(), rotate_last(), ${transformers})(${args})([](auto y, ${concat_params}, auto... xs) { transform_args(make_tensors(), rotate_last(), ${transformers})(${args})([](auto y, ${concat_params}, auto... xs) {
concat<${axis}>(${concat_args})(${post}, y, xs...); concat<${axis}>(${concat_args})(${post}, y, xs...);
......
...@@ -44,7 +44,7 @@ namespace migraphx { ...@@ -44,7 +44,7 @@ namespace migraphx {
extern "C" { extern "C" {
__global__ void gather_kernel(void* in_data, void* in_indices, void* output) MIGRAPHX_GLOBAL void gather_kernel(void* in_data, void* in_indices, void* output)
{ {
make_tensors()(in_data, in_indices, output)([](auto&&... xs) { make_tensors()(in_data, in_indices, output)([](auto&&... xs) {
gather<${axis}>(xs...); gather<${axis}>(xs...);
......
...@@ -44,7 +44,7 @@ namespace migraphx { ...@@ -44,7 +44,7 @@ namespace migraphx {
extern "C" { extern "C" {
__global__ void gathernd_kernel(void* in_data, void* in_indices, void* output) MIGRAPHX_GLOBAL void gathernd_kernel(void* in_data, void* in_indices, void* output)
{ {
make_tensors()(in_data, in_indices, output)([](auto&&... xs) { make_tensors()(in_data, in_indices, output)([](auto&&... xs) {
auto settings = make_gathernd_settings(MIGRAPHX_MAKE_CONSTANT(int64_t{BATCH_DIMS})); auto settings = make_gathernd_settings(MIGRAPHX_MAKE_CONSTANT(int64_t{BATCH_DIMS}));
......
...@@ -48,7 +48,7 @@ namespace migraphx { ...@@ -48,7 +48,7 @@ namespace migraphx {
${preamble} ${preamble}
extern "C" { extern "C" {
__global__ void ${kernel}(${params}) MIGRAPHX_GLOBAL void ${kernel}(${params})
{ {
transform_args(make_tensors(), rotate_last(), ${transformers})(${args})([](auto... xs) { transform_args(make_tensors(), rotate_last(), ${transformers})(${args})([](auto... xs) {
${layernorm}<${axis}>(${post}, ${eps}, xs...); ${layernorm}<${axis}>(${post}, ${eps}, xs...);
......
...@@ -44,7 +44,7 @@ static const char* const pointwise_kernel = R"__migraphx__( ...@@ -44,7 +44,7 @@ static const char* const pointwise_kernel = R"__migraphx__(
namespace migraphx { namespace migraphx {
extern "C" { extern "C" {
__global__ void pad_kernel(void* input_p, void* output_p) MIGRAPHX_GLOBAL void pad_kernel(void* input_p, void* output_p)
{ {
auto offsets = index_ints<${offsets}>{}; auto offsets = index_ints<${offsets}>{};
auto idx = make_index(); auto idx = make_index();
......
...@@ -44,7 +44,7 @@ namespace migraphx { ...@@ -44,7 +44,7 @@ namespace migraphx {
${preamble} ${preamble}
extern "C" { extern "C" {
__global__ void ${kernel}(${params}) MIGRAPHX_GLOBAL void ${kernel}(${params})
{ {
auto idx = make_index(); auto idx = make_index();
pointwise(idx, ${transformers})(${lambda}, ${args}); pointwise(idx, ${transformers})(${lambda}, ${args});
......
...@@ -45,7 +45,7 @@ namespace migraphx { ...@@ -45,7 +45,7 @@ namespace migraphx {
${preamble} ${preamble}
extern "C" { extern "C" {
__global__ void reduce_kernel(void* input_p, void* output_p) MIGRAPHX_GLOBAL void reduce_kernel(void* input_p, void* output_p)
{ {
transform_args(make_tensors(), ${transformers})(input_p, output_p)([](auto input, auto output) { transform_args(make_tensors(), ${transformers})(input_p, output_p)([](auto input, auto output) {
......
...@@ -41,7 +41,7 @@ namespace migraphx { ...@@ -41,7 +41,7 @@ namespace migraphx {
extern "C" { extern "C" {
__global__ void roialign_kernel(void* in_x, void* in_rois, void* in_ind, void* y) MIGRAPHX_GLOBAL void roialign_kernel(void* in_x, void* in_rois, void* in_ind, void* y)
{ {
make_tensors()(in_x, in_rois, in_ind, y)([](auto&&... xs) { make_tensors()(in_x, in_rois, in_ind, y)([](auto&&... xs) {
auto settings = make_roalign_settings(MIGRAPHX_MAKE_CONSTANT(float{ROIS_OFFSET}), auto settings = make_roalign_settings(MIGRAPHX_MAKE_CONSTANT(float{ROIS_OFFSET}),
......
...@@ -42,7 +42,7 @@ namespace migraphx { ...@@ -42,7 +42,7 @@ namespace migraphx {
extern "C" { extern "C" {
__global__ void scatternd_kernel(void* in_indices, void* in_updates, void* output) MIGRAPHX_GLOBAL void scatternd_kernel(void* in_indices, void* in_updates, void* output)
{ {
make_tensors()(in_indices, in_updates, output)([](auto&&... xs) { make_tensors()(in_indices, in_updates, output)([](auto&&... xs) {
scatternd(xs..., ${reduction}{}); scatternd(xs..., ${reduction}{});
......
...@@ -45,7 +45,7 @@ static const char* const softmax_kernel = R"__migraphx__( ...@@ -45,7 +45,7 @@ static const char* const softmax_kernel = R"__migraphx__(
namespace migraphx { namespace migraphx {
extern "C" { extern "C" {
__global__ void softmax_kernel(void* input_p, void* output_p) MIGRAPHX_GLOBAL void softmax_kernel(void* input_p, void* output_p)
{ {
transform_args(make_tensors(), ${transformers})(input_p, output_p)([](auto input, auto output) { transform_args(make_tensors(), ${transformers})(input_p, output_p)([](auto input, auto output) {
softmax<${axis}>(input, output); softmax<${axis}>(input, output);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment