Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
f06f6aa3
Commit
f06f6aa3
authored
Jul 09, 2019
by
Shucai Xiao
Browse files
merge changes from develop branch
parents
80a35596
ebfe9735
Changes
82
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
639 additions
and
169 deletions
+639
-169
src/targets/gpu/device/include/migraphx/gpu/device/visit.hpp
src/targets/gpu/device/include/migraphx/gpu/device/visit.hpp
+170
-0
src/targets/gpu/device/logsoftmax.cpp
src/targets/gpu/device/logsoftmax.cpp
+32
-52
src/targets/gpu/device/pad.cpp
src/targets/gpu/device/pad.cpp
+15
-22
src/targets/gpu/device/reduce_sum.cpp
src/targets/gpu/device/reduce_sum.cpp
+17
-0
src/targets/gpu/device/softmax.cpp
src/targets/gpu/device/softmax.cpp
+32
-56
src/targets/gpu/fuse_ops.cpp
src/targets/gpu/fuse_ops.cpp
+39
-19
src/targets/gpu/gather.cpp
src/targets/gpu/gather.cpp
+2
-4
src/targets/gpu/include/migraphx/gpu/argmax.hpp
src/targets/gpu/include/migraphx/gpu/argmax.hpp
+37
-0
src/targets/gpu/include/migraphx/gpu/argmin.hpp
src/targets/gpu/include/migraphx/gpu/argmin.hpp
+37
-0
src/targets/gpu/include/migraphx/gpu/device/arg_op.hpp
src/targets/gpu/include/migraphx/gpu/device/arg_op.hpp
+114
-0
src/targets/gpu/include/migraphx/gpu/device/argmax.hpp
src/targets/gpu/include/migraphx/gpu/device/argmax.hpp
+20
-0
src/targets/gpu/include/migraphx/gpu/device/argmin.hpp
src/targets/gpu/include/migraphx/gpu/device/argmin.hpp
+20
-0
src/targets/gpu/include/migraphx/gpu/device/erf.hpp
src/targets/gpu/include/migraphx/gpu/device/erf.hpp
+20
-0
src/targets/gpu/include/migraphx/gpu/device/gather.hpp
src/targets/gpu/include/migraphx/gpu/device/gather.hpp
+1
-4
src/targets/gpu/include/migraphx/gpu/device/logsoftmax.hpp
src/targets/gpu/include/migraphx/gpu/device/logsoftmax.hpp
+1
-4
src/targets/gpu/include/migraphx/gpu/device/reduce_sum.hpp
src/targets/gpu/include/migraphx/gpu/device/reduce_sum.hpp
+20
-0
src/targets/gpu/include/migraphx/gpu/device/softmax.hpp
src/targets/gpu/include/migraphx/gpu/device/softmax.hpp
+1
-4
src/targets/gpu/include/migraphx/gpu/erf.hpp
src/targets/gpu/include/migraphx/gpu/erf.hpp
+19
-0
src/targets/gpu/include/migraphx/gpu/reduce_sum.hpp
src/targets/gpu/include/migraphx/gpu/reduce_sum.hpp
+38
-0
src/targets/gpu/logsoftmax.cpp
src/targets/gpu/logsoftmax.cpp
+4
-4
No files found.
src/targets/gpu/device/include/migraphx/gpu/device/visit.hpp
0 → 100644
View file @
f06f6aa3
#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_VISIT_HPP
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_VISIT_HPP
#include <migraphx/gpu/device/tensor_view.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
template
<
class
F
>
void
visit_tensor_size
(
std
::
size_t
n
,
F
f
)
{
switch
(
n
)
{
case
1
:
{
f
(
std
::
integral_constant
<
std
::
size_t
,
1
>
{});
break
;
}
case
2
:
{
f
(
std
::
integral_constant
<
std
::
size_t
,
2
>
{});
break
;
}
case
3
:
{
f
(
std
::
integral_constant
<
std
::
size_t
,
3
>
{});
break
;
}
case
4
:
{
f
(
std
::
integral_constant
<
std
::
size_t
,
4
>
{});
break
;
}
case
5
:
{
f
(
std
::
integral_constant
<
std
::
size_t
,
5
>
{});
break
;
}
default:
throw
std
::
runtime_error
(
"Unknown tensor size"
);
}
}
inline
shape
get_shape
(
const
shape
&
x
)
{
return
x
;
}
template
<
class
T
>
auto
get_shape
(
const
T
&
x
)
->
decltype
(
x
.
get_shape
())
{
return
x
.
get_shape
();
}
template
<
class
V
,
class
F
,
class
...
Ts
>
void
hip_visit_all_impl
(
const
shape
&
s
,
F
f
,
V
&&
v
,
Ts
&&
...
xs
)
{
std
::
initializer_list
<
migraphx
::
shape
::
type_t
>
types
=
{
get_shape
(
xs
).
type
()...};
if
(
!
std
::
all_of
(
types
.
begin
(),
types
.
end
(),
[
&
](
migraphx
::
shape
::
type_t
t
)
{
return
t
==
s
.
type
();
}))
MIGRAPHX_THROW
(
"Types must be the same"
);
std
::
initializer_list
<
std
::
size_t
>
ranks
=
{
get_shape
(
xs
).
lens
().
size
()...};
if
(
!
std
::
all_of
(
ranks
.
begin
(),
ranks
.
end
(),
[
&
](
std
::
size_t
r
)
{
return
r
==
s
.
lens
().
size
();
}))
MIGRAPHX_THROW
(
"Ranks must be the same"
);
visit_tensor_size
(
s
.
lens
().
size
(),
[
&
](
auto
ndim
)
{
s
.
visit_type
([
&
](
auto
as
)
{
v
(
f
(
xs
,
ndim
,
as
)...);
});
});
}
template
<
class
V
,
class
F
,
class
...
Ts
>
void
hip_visit_views_impl
(
const
shape
&
s
,
F
f
,
V
&&
v
,
Ts
&&
...
xs
)
{
std
::
initializer_list
<
std
::
size_t
>
ranks
=
{
get_shape
(
xs
).
lens
().
size
()...};
if
(
!
std
::
all_of
(
ranks
.
begin
(),
ranks
.
end
(),
[
&
](
std
::
size_t
r
)
{
return
r
==
s
.
lens
().
size
();
}))
MIGRAPHX_THROW
(
"Ranks must be the same"
);
visit_tensor_size
(
s
.
lens
().
size
(),
[
&
](
auto
ndim
)
{
v
(
f
(
xs
,
ndim
)...);
});
}
template
<
class
F
>
struct
hip_convert
{
F
f
;
template
<
class
RawData
,
class
N
,
class
As
>
auto
operator
()(
RawData
x
,
N
ndim
,
As
as
)
const
->
decltype
(
make_hip_view
<
ndim
>
(
x
.
get_shape
(),
f
(
as
.
from
(
x
.
data
()))))
{
return
make_hip_view
<
ndim
>
(
x
.
get_shape
(),
f
(
as
.
from
(
x
.
data
())));
}
template
<
class
N
,
class
As
>
auto
operator
()(
const
shape
&
s
,
N
ndim
,
As
)
const
{
return
make_hip_shape
<
ndim
>
(
s
);
}
};
template
<
class
F
>
hip_convert
<
F
>
make_hip_convert
(
F
f
)
{
return
{
f
};
}
template
<
class
F
>
struct
hip_convert_view
{
F
f
;
template
<
class
T
,
class
N
>
auto
operator
()(
tensor_view
<
T
>
x
,
N
ndim
)
const
{
return
make_hip_view
<
ndim
>
(
f
(
x
));
}
template
<
class
N
>
auto
operator
()(
const
shape
&
s
,
N
ndim
)
const
{
return
make_hip_shape
<
ndim
>
(
s
);
}
};
template
<
class
F
>
hip_convert_view
<
F
>
make_hip_convert_view
(
F
f
)
{
return
{
f
};
}
template
<
class
T
,
class
...
Ts
>
auto
hip_visit_all
(
T
&&
x
,
Ts
&&
...
xs
)
{
return
[
&
](
auto
f
)
{
hip_visit_all_impl
(
get_shape
(
x
),
make_hip_convert
([](
auto
*
p
)
{
return
device_cast
(
p
);
}),
f
,
x
,
xs
...);
};
}
template
<
std
::
size_t
N
,
class
T
,
class
...
Ts
>
auto
hip_vec_visit_all
(
T
&&
x
,
Ts
&&
...
xs
)
{
return
[
&
](
auto
f
)
{
hip_visit_all_impl
(
get_shape
(
x
),
make_hip_convert
([](
auto
*
p
)
{
return
as_vec
<
N
>
(
device_cast
(
p
));
}),
f
,
x
,
xs
...);
};
}
template
<
class
T
,
class
...
Ts
>
auto
hip_pointer_visit_all
(
T
&&
x
,
Ts
&&
...
xs
)
{
return
[
&
](
auto
f
)
{
visit_all
(
x
,
xs
...)([
&
](
auto
...
vs
)
{
f
(
device_cast
(
vs
.
data
())...);
});
};
}
template
<
class
T
,
class
...
Ts
>
auto
hip_visit_views
(
T
&&
x
,
Ts
&&
...
xs
)
{
return
[
&
](
auto
f
)
{
hip_visit_views_impl
(
get_shape
(
x
),
make_hip_convert_view
([](
auto
v
)
{
return
device_cast
(
v
);
}),
f
,
x
,
xs
...);
};
}
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/device/logsoftmax.cpp
View file @
f06f6aa3
#include <migraphx/shape.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/gpu/device/logsoftmax.hpp>
#include <migraphx/gpu/device/reduce.hpp>
#include <migraphx/gpu/device/tensor.hpp>
#include <migraphx/gpu/device/launch.hpp>
#include <migraphx/gpu/device/types.hpp>
...
...
@@ -11,66 +12,45 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace
gpu
{
namespace
device
{
argument
logsoftmax
(
hipStream_t
stream
,
const
migraphx
::
shape
&
output_shape
,
std
::
vector
<
migraphx
::
argument
>
args
,
int
axis
)
void
logsoftmax
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
,
int
axis
)
{
auto
lens
=
output_shape
.
lens
();
auto
num_in_batch
=
lens
[
axis
];
auto
batch_lens
=
lens
;
batch_lens
[
axis
]
=
1
;
migraphx
::
shape
batch_shape
{
output_shape
.
type
(),
batch_lens
};
visit_all
(
args
.
back
(),
args
.
front
())([
&
](
auto
output
,
auto
input
)
{
const
auto
*
input_ptr
=
device_cast
(
input
.
data
());
auto
*
output_ptr
=
device_cast
(
output
.
data
());
visit_tensor_size
(
batch_shape
.
lens
().
size
(),
[
&
](
auto
n_dim
)
{
hip_tensor_descriptor
<
n_dim
>
desc_batch
(
batch_shape
);
hip_tensor_descriptor
<
n_dim
>
desc_data
(
output_shape
);
// each thread is for one item in the batch
gs_launch
(
stream
,
batch_shape
.
elements
())([
=
](
auto
i
)
{
auto
batch_idx
=
desc_batch
.
multi
(
i
);
auto
data_idx
=
batch_idx
;
// get max
auto
batch_max
=
input_ptr
[
desc_data
.
linear
(
batch_idx
)];
for
(
std
::
size_t
j
=
1
;
j
<
num_in_batch
;
++
j
)
{
auto
lens
=
result
.
get_shape
().
lens
();
auto
batch_lens
=
lens
;
std
::
size_t
batch_item_num
=
lens
[
axis
];
batch_lens
[
axis
]
=
1
;
migraphx
::
shape
batch_shape
{
result
.
get_shape
().
type
(),
batch_lens
};
hip_visit_all
(
result
,
arg
,
batch_shape
)([
&
](
auto
output
,
auto
input
,
auto
batch
)
{
const
std
::
size_t
max_block_size
=
256
;
const
std
::
size_t
block_size
=
compute_block_size
(
batch_item_num
,
max_block_size
);
gs_launch
(
stream
,
batch_shape
.
elements
()
*
block_size
,
block_size
)([
=
](
auto
i
,
auto
idx
)
__device__
{
auto
data_idx
=
batch
.
multi
(
i
/
block_size
);
using
type
=
device_type
<
std
::
remove_cv_t
<
typename
decltype
(
input
)
::
value_type
>>
;
type
init
=
lowest
();
auto
batch_max
=
block_reduce
<
max_block_size
>
(
idx
,
max
{},
init
,
batch_item_num
,
[
&
](
auto
j
)
__device__
{
data_idx
[
axis
]
=
j
;
size_t
idx
=
desc_data
.
linear
(
data_idx
);
batch_max
=
std
::
max
(
to_hip_type
(
batch_max
),
to_hip_type
(
input_ptr
[
idx
]));
}
return
input
[
data_idx
];
});
for
(
std
::
size_t
j
=
0
;
j
<
num_in_batch
;
++
j
)
{
data_idx
[
axis
]
=
j
;
size_t
idx
=
desc_data
.
linear
(
data_idx
);
output_ptr
[
idx
]
=
input_ptr
[
idx
]
-
batch_max
;
}
auto
batch_sum
=
::
exp
(
to_hip_type
(
output_ptr
[
desc_data
.
linear
(
batch_idx
)]));
for
(
std
::
size_t
j
=
1
;
j
<
num_in_batch
;
++
j
)
{
auto
batch_sum
=
block_reduce
<
max_block_size
>
(
idx
,
sum
{},
0
,
batch_item_num
,
[
&
](
auto
j
)
__device__
{
data_idx
[
axis
]
=
j
;
size_t
idx
=
desc_data
.
linear
(
data_idx
);
batch_sum
+=
::
exp
(
to_hip_type
(
output_ptr
[
idx
]));
}
batch_sum
=
::
log
(
to_hip_type
(
batch_sum
));
auto
val
=
input
[
data_idx
]
-
batch_max
;
return
::
exp
(
to_hip_type
(
val
));
});
for
(
std
::
size_t
j
=
0
;
j
<
num_in_batch
;
++
j
)
{
data_idx
[
axis
]
=
j
;
size_t
idx
=
desc_data
.
linear
(
data_idx
);
output_ptr
[
idx
]
-=
batch_sum
;
}
auto
log_batch_sum
=
::
log
(
to_hip_type
(
batch_sum
))
+
batch_max
;
idx
.
local_stride
(
batch_item_num
,
[
&
](
auto
j
)
{
data_idx
[
axis
]
=
j
;
output
[
data_idx
]
=
input
[
data_idx
]
-
log_batch_sum
;
});
});
});
return
args
.
back
();
}
}
// namespace device
...
...
src/targets/gpu/device/pad.cpp
View file @
f06f6aa3
...
...
@@ -15,33 +15,26 @@ argument
pad
(
hipStream_t
stream
,
argument
result
,
argument
arg1
,
float
value
,
std
::
vector
<
std
::
int64_t
>
pads
)
{
std
::
size_t
nelements
=
arg1
.
get_shape
().
elements
();
visit_all
(
result
)([
&
](
auto
output
)
{
auto
*
outptr
=
device_cast
(
output
.
data
())
;
using
type
=
typename
decltype
(
output
)
::
value_type
;
device_type
<
type
>
device_val
=
value
;
hip_
visit_all
(
result
,
arg1
)([
&
](
auto
output
,
auto
input
)
{
using
type
=
typename
decltype
(
output
)
::
value_type
;
using
hip_index
=
typename
decltype
(
output
)
::
hip_index
;
type
device_val
=
value
;
if
(
float_equal
(
value
,
std
::
numeric_limits
<
float
>::
lowest
()))
{
device_val
=
device_cast
(
std
::
numeric_limits
<
type
>::
lowest
());
}
gs_launch
(
stream
,
result
.
get_shape
().
elements
())([
=
](
auto
i
)
{
outptr
[
i
]
=
device_val
;
});
});
gs_launch
(
stream
,
result
.
get_shape
().
elements
())([
=
](
auto
i
)
{
output
.
data
()[
i
]
=
device_val
;
});
visit_all
(
result
,
arg1
)([
&
](
auto
output
,
auto
input
)
{
visit_tensor_size
(
result
.
get_shape
().
lens
().
size
(),
[
&
](
auto
ndim
)
{
std
::
size_t
offsets
[
ndim
];
std
::
copy
(
pads
.
begin
(),
pads
.
begin
()
+
ndim
,
offsets
);
auto
*
outptr
=
output
.
data
();
const
auto
*
inptr
=
input
.
data
();
hip_tensor_descriptor
<
ndim
>
desc_input
(
input
.
get_shape
());
hip_tensor_descriptor
<
ndim
>
desc_output
(
output
.
get_shape
());
gs_launch
(
stream
,
nelements
)([
=
](
auto
i
)
{
auto
idx
=
desc_input
.
multi
(
i
);
for
(
std
::
size_t
j
=
0
;
j
<
ndim
;
j
++
)
{
idx
[
j
]
+=
offsets
[
j
];
}
outptr
[
desc_output
.
linear
(
idx
)]
=
inptr
[
i
];
});
hip_index
offsets
;
std
::
copy
(
pads
.
begin
(),
pads
.
begin
()
+
offsets
.
size
(),
offsets
.
begin
());
gs_launch
(
stream
,
nelements
)([
=
](
auto
i
)
{
auto
idx
=
input
.
get_shape
().
multi
(
i
);
for
(
std
::
size_t
j
=
0
;
j
<
offsets
.
size
();
j
++
)
{
idx
[
j
]
+=
offsets
[
j
];
}
output
[
idx
]
=
input
.
data
()[
i
];
});
});
return
result
;
...
...
src/targets/gpu/device/reduce_sum.cpp
0 → 100644
View file @
f06f6aa3
#include <migraphx/gpu/device/reduce_sum.hpp>
#include <migraphx/gpu/device/reduce.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
void
reduce_sum
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
{
reduce
(
stream
,
result
,
arg
,
sum
{},
0
,
id
{},
id
{});
}
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/device/softmax.cpp
View file @
f06f6aa3
...
...
@@ -2,6 +2,7 @@
#include <migraphx/argument.hpp>
#include <migraphx/dfor.hpp>
#include <migraphx/gpu/device/softmax.hpp>
#include <migraphx/gpu/device/reduce.hpp>
#include <migraphx/gpu/device/tensor.hpp>
#include <migraphx/gpu/device/launch.hpp>
#include <migraphx/gpu/device/types.hpp>
...
...
@@ -12,69 +13,44 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace
gpu
{
namespace
device
{
argument
softmax
(
hipStream_t
stream
,
const
migraphx
::
shape
&
output_shape
,
std
::
vector
<
migraphx
::
argument
>
args
,
int
axis
)
void
softmax
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
,
int
axis
)
{
auto
lens
=
output_shape
.
lens
();
auto
batch_lens
=
lens
;
size_t
n_dims
=
lens
[
axis
];
batch_lens
[
axis
]
=
1
;
migraphx
::
shape
batch_shape
{
shape
::
int32_type
,
batch_lens
};
visit_all
(
args
.
back
(),
args
.
front
())([
&
](
auto
output
,
auto
input
)
{
const
auto
*
input_ptr
=
device_cast
(
input
.
data
());
auto
*
output_ptr
=
device_cast
(
output
.
data
());
visit_tensor_size
(
batch_shape
.
lens
().
size
(),
[
&
](
auto
n_dim
)
{
hip_tensor_descriptor
<
n_dim
>
desc_batch
(
batch_shape
);
hip_tensor_descriptor
<
n_dim
>
desc_data
(
output_shape
);
// each thread is for one item in the batch
gs_launch
(
stream
,
batch_shape
.
elements
())([
=
](
auto
i
)
{
auto
batch_idx
=
desc_batch
.
multi
(
i
);
auto
data_idx
=
batch_idx
;
// get max
auto
batch_max
=
input_ptr
[
desc_data
.
linear
(
batch_idx
)];
for
(
std
::
size_t
j
=
1
;
j
<
n_dims
;
++
j
)
{
auto
lens
=
result
.
get_shape
().
lens
();
auto
batch_lens
=
lens
;
std
::
size_t
batch_item_num
=
lens
[
axis
];
batch_lens
[
axis
]
=
1
;
migraphx
::
shape
batch_shape
{
result
.
get_shape
().
type
(),
batch_lens
};
hip_visit_all
(
result
,
arg
,
batch_shape
)([
&
](
auto
output
,
auto
input
,
auto
batch
)
{
const
std
::
size_t
max_block_size
=
256
;
const
std
::
size_t
block_size
=
compute_block_size
(
batch_item_num
,
max_block_size
);
gs_launch
(
stream
,
batch_shape
.
elements
()
*
block_size
,
block_size
)([
=
](
auto
i
,
auto
idx
)
__device__
{
auto
data_idx
=
batch
.
multi
(
i
/
block_size
);
using
type
=
device_type
<
std
::
remove_cv_t
<
typename
decltype
(
input
)
::
value_type
>>
;
type
init
=
lowest
();
auto
batch_max
=
block_reduce
<
max_block_size
>
(
idx
,
max
{},
init
,
batch_item_num
,
[
&
](
auto
j
)
__device__
{
data_idx
[
axis
]
=
j
;
batch_max
=
std
::
max
(
to_hip_type
(
batch_max
),
to_hip_type
(
input_ptr
[
desc_data
.
linear
(
data_idx
)]));
}
for
(
std
::
size_t
j
=
0
;
j
<
n_dims
;
++
j
)
{
data_idx
[
axis
]
=
j
;
auto
idx
=
desc_data
.
linear
(
data_idx
);
output_ptr
[
idx
]
=
input_ptr
[
idx
]
-
batch_max
;
}
return
input
[
data_idx
];
});
for
(
std
::
size_t
j
=
0
;
j
<
n_dims
;
++
j
)
{
data_idx
[
axis
]
=
j
;
auto
idx
=
desc_data
.
linear
(
data_idx
);
output_ptr
[
idx
]
=
exp
(
to_hip_type
(
output_ptr
[
idx
]));
}
auto
batch_sum
=
output_ptr
[
desc_data
.
linear
(
batch_idx
)];
for
(
std
::
size_t
j
=
1
;
j
<
n_dims
;
++
j
)
{
auto
batch_sum
=
block_reduce
<
max_block_size
>
(
idx
,
sum
{},
0
,
batch_item_num
,
[
&
](
auto
j
)
__device__
{
data_idx
[
axis
]
=
j
;
batch_sum
+=
output_ptr
[
desc_data
.
linear
(
data_idx
)];
}
for
(
std
::
size_t
j
=
0
;
j
<
n_dims
;
++
j
)
{
data_idx
[
axis
]
=
j
;
auto
idx
=
desc_data
.
linear
(
data_idx
);
output_ptr
[
idx
]
=
output_ptr
[
idx
]
/
batch_sum
;
}
auto
val
=
input
[
data_idx
]
-
batch_max
;
return
::
exp
(
to_hip_type
(
val
));
});
idx
.
local_stride
(
batch_item_num
,
[
&
](
auto
j
)
{
data_idx
[
axis
]
=
j
;
auto
val
=
input
[
data_idx
]
-
batch_max
;
output
[
data_idx
]
=
::
exp
(
to_hip_type
(
val
))
/
batch_sum
;
});
});
});
return
args
.
back
();
}
}
// namespace device
...
...
src/targets/gpu/fuse_ops.cpp
View file @
f06f6aa3
...
...
@@ -5,6 +5,7 @@
#include <migraphx/gpu/device/add_relu.hpp>
#include <migraphx/gpu/device/add.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/array.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
...
@@ -122,13 +123,6 @@ MIGRAPHX_PRED_MATCHER(bias_shape, instruction_ref ins)
s
.
strides
()[
1
]
!=
0
and
s
.
strides
()[
2
]
==
0
and
s
.
strides
()[
3
]
==
0
;
}
// TODO: Move to another header
template
<
class
T
,
class
...
Ts
>
std
::
array
<
T
,
sizeof
...(
Ts
)
+
1
>
make_array
(
T
x
,
Ts
...
xs
)
{
return
{
std
::
move
(
x
),
std
::
move
(
static_cast
<
T
>
(
xs
))...};
}
MIGRAPHX_PRED_MATCHER
(
fusable_conv
,
instruction_ref
ins
)
{
if
(
ins
->
name
()
!=
"gpu::convolution"
)
...
...
@@ -206,12 +200,33 @@ struct hip_add_relu
}
};
void
move_broadcasted_back
(
std
::
vector
<
instruction_ref
>&
args
)
{
// Ensure the last arguments is the broadcasted one
auto
it
=
std
::
find_if
(
args
.
begin
(),
args
.
end
(),
[](
auto
arg
)
{
return
arg
->
get_shape
().
broadcasted
();
});
if
(
it
!=
args
.
end
())
std
::
swap
(
*
it
,
*
std
::
prev
(
args
.
end
(),
2
));
}
void
move_standard_front
(
std
::
vector
<
instruction_ref
>&
args
)
{
// Ensure the first arguments is the standard one
auto
it
=
std
::
find_if
(
args
.
begin
(),
args
.
end
(),
[](
auto
arg
)
{
return
arg
->
get_shape
().
standard
();
});
if
(
it
!=
args
.
end
())
std
::
swap
(
*
it
,
args
.
front
());
}
struct
find_add_relu
{
auto
matcher
()
const
{
return
match
::
name
(
"gpu::relu"
)(
match
::
arg
(
0
)(
match
::
any_of
(
match
::
name
(
"gpu::add"
),
match
::
name
(
"hip::triadd"
)).
bind
(
"add"
)));
return
match
::
name
(
"gpu::relu"
)(
match
::
arg
(
0
)(
match
::
any_of
(
match
::
name
(
"gpu::add"
),
match
::
name
(
"hip::triadd"
),
match
::
any_of
[
match
::
inputs
()](
match
::
standard_shape
()))
.
bind
(
"add"
)));
}
void
apply
(
program
&
p
,
match
::
matcher_result
r
)
const
...
...
@@ -219,6 +234,9 @@ struct find_add_relu
auto
add_ins
=
r
.
instructions
[
"add"
];
auto
ins
=
r
.
result
;
auto
args
=
add_ins
->
inputs
();
move_standard_front
(
args
);
move_broadcasted_back
(
args
);
// Use the allocation from the relu operator
args
.
back
()
=
ins
->
inputs
().
back
();
if
(
add_ins
->
name
()
==
"gpu::add"
)
...
...
@@ -232,24 +250,26 @@ struct find_triadd
{
auto
matcher
()
const
{
return
match
::
name
(
"gpu::add"
)(
match
::
either_arg
(
0
,
1
)(
match
::
name
(
"gpu::add"
).
bind
(
"add"
),
match
::
any
().
bind
(
"input"
)));
return
match
::
name
(
"gpu::add"
)(
match
::
either_arg
(
0
,
1
)(
match
::
name
(
"gpu::add"
).
bind
(
"add"
),
match
::
any
(
match
::
any_of
[
match
::
inputs
()](
match
::
standard_shape
())).
bind
(
"input"
)));
}
void
apply
(
program
&
p
,
match
::
matcher_result
r
)
const
{
auto
add_ins
=
r
.
instructions
[
"add"
];
auto
input_ins
=
r
.
instructions
[
"input"
];
auto
ins
=
r
.
result
;
auto
args
=
add_ins
->
inputs
();
auto
add_ins
=
r
.
instructions
[
"add"
];
auto
input_ins
=
r
.
instructions
[
"input"
];
auto
ins
=
r
.
result
;
auto
args
=
add_ins
->
inputs
();
assert
(
add_ins
!=
input_ins
);
auto
is_broadcasted
=
[](
auto
arg
)
{
return
arg
->
get_shape
().
broadcasted
();
};
if
(
std
::
count_if
(
args
.
begin
(),
args
.
end
(),
is_broadcasted
)
>
1
)
return
;
args
.
insert
(
args
.
begin
(),
input_ins
);
// Ensure the last arguments is the broadcasted one
auto
it
=
std
::
find_if
(
args
.
begin
(),
args
.
end
(),
is_broadcasted
);
if
(
it
!=
args
.
end
())
std
::
swap
(
*
it
,
*
std
::
prev
(
args
.
end
(),
2
));
move_standard_front
(
args
);
move_broadcasted_back
(
args
);
args
.
back
()
=
ins
->
inputs
().
back
();
p
.
replace_instruction
(
ins
,
hip_triadd
{},
args
);
}
...
...
src/targets/gpu/gather.cpp
View file @
f06f6aa3
...
...
@@ -12,11 +12,9 @@ shape hip_gather::compute_shape(std::vector<shape> inputs) const
return
op
.
compute_shape
(
inputs
);
}
argument
hip_gather
::
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
argument
hip_gather
::
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
{
return
device
::
gather
(
ctx
.
get_stream
().
get
(),
output_shape
,
args
,
op
.
axis
);
return
device
::
gather
(
ctx
.
get_stream
().
get
(),
args
.
back
(),
args
[
0
]
,
args
[
1
]
,
op
.
axis
);
}
}
// namespace gpu
...
...
src/targets/gpu/include/migraphx/gpu/argmax.hpp
0 → 100644
View file @
f06f6aa3
#ifndef MIGRAPHX_GUARD_RTGLIB_ARGMAX_HPP
#define MIGRAPHX_GUARD_RTGLIB_ARGMAX_HPP
#include <migraphx/shape.hpp>
#include <migraphx/op/argmax.hpp>
#include <migraphx/gpu/device/argmax.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
struct
context
;
struct
hip_argmax
{
op
::
argmax
op
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
migraphx
::
reflect
(
self
.
op
,
f
);
}
std
::
string
name
()
const
{
return
"gpu::argmax"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
;
argument
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
;
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/include/migraphx/gpu/argmin.hpp
0 → 100644
View file @
f06f6aa3
#ifndef MIGRAPHX_GUARD_RTGLIB_ARGMIN_HPP
#define MIGRAPHX_GUARD_RTGLIB_ARGMIN_HPP
#include <migraphx/shape.hpp>
#include <migraphx/op/argmin.hpp>
#include <migraphx/gpu/device/argmin.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
struct
context
;
struct
hip_argmin
{
op
::
argmin
op
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
migraphx
::
reflect
(
self
.
op
,
f
);
}
std
::
string
name
()
const
{
return
"gpu::argmin"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
;
argument
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
;
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/include/migraphx/gpu/device/arg_op.hpp
0 → 100644
View file @
f06f6aa3
#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_ARG_OP_HPP
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_ARG_OP_HPP
#include <migraphx/shape.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/gpu/device/tensor.hpp>
#include <migraphx/gpu/device/launch.hpp>
#include <migraphx/gpu/device/types.hpp>
#include <migraphx/gpu/device/reduce.hpp>
#include <migraphx/gpu/hip.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
template
<
class
T
>
struct
val_index
{
T
val
;
int64_t
index
;
};
template
<
class
T
>
MIGRAPHX_DEVICE_CONSTEXPR
val_index
<
T
>
make_val_index
(
T
v
)
{
return
{
v
,
-
1
};
}
template
<
class
T
>
MIGRAPHX_DEVICE_CONSTEXPR
val_index
<
T
>
make_val_index
(
T
v
,
int64_t
i
)
{
return
{
v
,
i
};
}
struct
argmax_op
{
template
<
class
T
>
MIGRAPHX_DEVICE_CONSTEXPR
val_index
<
T
>
operator
()(
val_index
<
T
>
x
,
val_index
<
T
>
y
)
const
{
if
(
x
.
val
>
y
.
val
)
return
x
;
else
if
(
x
.
val
<
y
.
val
)
return
y
;
else
{
return
(
x
.
index
<
y
.
index
)
?
x
:
y
;
}
}
MIGRAPHX_DEVICE_CONSTEXPR
auto
init
()
const
{
return
lowest
();
}
};
struct
argmin_op
{
template
<
class
T
>
MIGRAPHX_DEVICE_CONSTEXPR
val_index
<
T
>
operator
()(
val_index
<
T
>
x
,
val_index
<
T
>
y
)
const
{
if
(
x
.
val
<
y
.
val
)
return
x
;
else
if
(
x
.
val
>
y
.
val
)
return
y
;
else
{
return
(
x
.
index
<
y
.
index
)
?
x
:
y
;
}
}
MIGRAPHX_DEVICE_CONSTEXPR
auto
init
()
const
{
return
highest
();
}
};
template
<
class
Op
>
void
arg_op
(
Op
op
,
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
,
int64_t
axis
)
{
auto
arg_shape
=
arg
.
get_shape
();
auto
lens
=
arg_shape
.
lens
();
auto
batch_lens
=
lens
;
size_t
batch_item_num
=
lens
[
axis
];
batch_lens
[
axis
]
=
1
;
migraphx
::
shape
batch_shape
{
arg_shape
.
type
(),
batch_lens
};
hip_visit_all
(
arg
,
arg_shape
,
batch_shape
)([
&
](
auto
input
,
auto
arg_s
,
auto
batch_s
)
{
auto
output
=
device_cast
(
result
.
get
<
int64_t
>
().
data
());
using
type
=
device_type
<
std
::
remove_cv_t
<
typename
decltype
(
input
)
::
value_type
>>
;
// use one block for items in one batch.
const
size_t
max_block_size
=
256
;
const
std
::
size_t
block_size
=
compute_block_size
(
batch_item_num
,
max_block_size
);
gs_launch
(
stream
,
batch_shape
.
elements
()
*
block_size
,
block_size
)([
=
](
auto
i
,
auto
idx
)
__device__
{
auto
batch_idx
=
batch_s
.
multi
(
i
/
block_size
);
auto
data_idx
=
batch_idx
;
auto
init
=
make_val_index
<
type
>
(
op
.
init
());
auto
op_output
=
block_reduce
<
max_block_size
>
(
idx
,
op
,
init
,
batch_item_num
,
[
&
](
auto
j
)
__device__
{
data_idx
[
axis
]
=
j
;
return
make_val_index
(
input
[
arg_s
.
index
(
data_idx
)],
j
);
});
if
(
idx
.
local
==
0
)
{
output
[
batch_s
.
index
(
batch_idx
)]
=
op_output
.
index
;
}
});
});
}
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/include/migraphx/gpu/device/argmax.hpp
0 → 100644
View file @
f06f6aa3
#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_ARGMAX_HPP
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_ARGMAX_HPP
#include <migraphx/argument.hpp>
#include <migraphx/config.hpp>
#include <hip/hip_runtime_api.h>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
void
argmax
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
,
int64_t
axis
);
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/include/migraphx/gpu/device/argmin.hpp
0 → 100644
View file @
f06f6aa3
#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_ARGMIN_HPP
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_ARGMIN_HPP
#include <migraphx/argument.hpp>
#include <migraphx/config.hpp>
#include <hip/hip_runtime_api.h>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
void
argmin
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
,
int64_t
axis
);
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/include/migraphx/gpu/device/erf.hpp
0 → 100644
View file @
f06f6aa3
#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_ERF_HPP
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_ERF_HPP
#include <migraphx/argument.hpp>
#include <migraphx/config.hpp>
#include <hip/hip_runtime_api.h>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
void
erf
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
);
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/include/migraphx/gpu/device/gather.hpp
View file @
f06f6aa3
...
...
@@ -10,10 +10,7 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace
gpu
{
namespace
device
{
argument
gather
(
hipStream_t
stream
,
const
migraphx
::
shape
&
output_shape
,
std
::
vector
<
migraphx
::
argument
>
args
,
int
axis
);
argument
gather
(
hipStream_t
stream
,
argument
result
,
argument
arg1
,
argument
arg2
,
int
axis
);
}
// namespace device
}
// namespace gpu
...
...
src/targets/gpu/include/migraphx/gpu/device/logsoftmax.hpp
View file @
f06f6aa3
...
...
@@ -10,10 +10,7 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace
gpu
{
namespace
device
{
argument
logsoftmax
(
hipStream_t
stream
,
const
migraphx
::
shape
&
output_shape
,
std
::
vector
<
migraphx
::
argument
>
args
,
int
axis
);
void
logsoftmax
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
,
int
axis
);
}
// namespace device
}
// namespace gpu
...
...
src/targets/gpu/include/migraphx/gpu/device/reduce_sum.hpp
0 → 100644
View file @
f06f6aa3
#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_REDUCE_SUM_HPP
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_REDUCE_SUM_HPP
#include <migraphx/argument.hpp>
#include <migraphx/config.hpp>
#include <hip/hip_runtime_api.h>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
void
reduce_sum
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
);
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/include/migraphx/gpu/device/softmax.hpp
View file @
f06f6aa3
...
...
@@ -10,10 +10,7 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace
gpu
{
namespace
device
{
argument
softmax
(
hipStream_t
stream
,
const
migraphx
::
shape
&
output_shape
,
std
::
vector
<
migraphx
::
argument
>
args
,
int
axis
);
void
softmax
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
,
int
axis
);
}
// namespace device
}
// namespace gpu
...
...
src/targets/gpu/include/migraphx/gpu/erf.hpp
0 → 100644
View file @
f06f6aa3
#ifndef MIGRAPHX_GUARD_RTGLIB_ERF_HPP
#define MIGRAPHX_GUARD_RTGLIB_ERF_HPP
#include <migraphx/gpu/oper.hpp>
#include <migraphx/gpu/device/erf.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
struct
hip_erf
:
unary_device
<
hip_erf
,
device
::
erf
>
{
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/include/migraphx/gpu/reduce_sum.hpp
0 → 100644
View file @
f06f6aa3
#ifndef MIGRAPHX_GUARD_RTGLIB_REDUCE_SUM_HPP
#define MIGRAPHX_GUARD_RTGLIB_REDUCE_SUM_HPP
#include <migraphx/shape.hpp>
#include <migraphx/op/reduce_sum.hpp>
#include <migraphx/reflect.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
struct
context
;
struct
hip_reduce_sum
{
op
::
reduce_sum
op
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
migraphx
::
reflect
(
self
.
op
,
f
);
}
std
::
string
name
()
const
{
return
"gpu::reduce_sum"
;
}
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
;
argument
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
;
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/logsoftmax.cpp
View file @
f06f6aa3
...
...
@@ -15,11 +15,11 @@ shape hip_logsoftmax::compute_shape(const std::vector<shape>& inputs) const
return
op
.
compute_shape
({
inputs
.
at
(
0
)});
}
argument
hip_logsoftmax
::
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
argument
hip_logsoftmax
::
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
{
return
device
::
logsoftmax
(
ctx
.
get_stream
().
get
(),
output_shape
,
args
,
op
.
axis
);
device
::
logsoftmax
(
ctx
.
get_stream
().
get
(),
args
.
back
(),
args
.
front
(),
op
.
axis
);
return
args
.
back
();
}
}
// namespace gpu
...
...
Prev
1
2
3
4
5
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment