Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
eb0d8fee
Commit
eb0d8fee
authored
Jun 04, 2019
by
Paul
Browse files
Merge branch 'develop' into driver
parents
65ef35cd
0d796941
Changes
320
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
515 additions
and
201 deletions
+515
-201
src/targets/gpu/device/convert.cpp
src/targets/gpu/device/convert.cpp
+24
-0
src/targets/gpu/device/gather.cpp
src/targets/gpu/device/gather.cpp
+15
-11
src/targets/gpu/device/logsoftmax.cpp
src/targets/gpu/device/logsoftmax.cpp
+70
-0
src/targets/gpu/eliminate_workspace.cpp
src/targets/gpu/eliminate_workspace.cpp
+0
-1
src/targets/gpu/elu.cpp
src/targets/gpu/elu.cpp
+1
-4
src/targets/gpu/fuse_ops.cpp
src/targets/gpu/fuse_ops.cpp
+35
-5
src/targets/gpu/gather.cpp
src/targets/gpu/gather.cpp
+2
-5
src/targets/gpu/gemm.cpp
src/targets/gpu/gemm.cpp
+190
-42
src/targets/gpu/hip.cpp
src/targets/gpu/hip.cpp
+2
-0
src/targets/gpu/include/migraphx/gpu/abs.hpp
src/targets/gpu/include/migraphx/gpu/abs.hpp
+14
-16
src/targets/gpu/include/migraphx/gpu/acos.hpp
src/targets/gpu/include/migraphx/gpu/acos.hpp
+0
-15
src/targets/gpu/include/migraphx/gpu/add.hpp
src/targets/gpu/include/migraphx/gpu/add.hpp
+0
-15
src/targets/gpu/include/migraphx/gpu/adjust_allocation.hpp
src/targets/gpu/include/migraphx/gpu/adjust_allocation.hpp
+23
-0
src/targets/gpu/include/migraphx/gpu/asin.hpp
src/targets/gpu/include/migraphx/gpu/asin.hpp
+0
-15
src/targets/gpu/include/migraphx/gpu/atan.hpp
src/targets/gpu/include/migraphx/gpu/atan.hpp
+0
-15
src/targets/gpu/include/migraphx/gpu/batchnorm.hpp
src/targets/gpu/include/migraphx/gpu/batchnorm.hpp
+15
-17
src/targets/gpu/include/migraphx/gpu/clip.hpp
src/targets/gpu/include/migraphx/gpu/clip.hpp
+37
-0
src/targets/gpu/include/migraphx/gpu/concat.hpp
src/targets/gpu/include/migraphx/gpu/concat.hpp
+14
-17
src/targets/gpu/include/migraphx/gpu/context.hpp
src/targets/gpu/include/migraphx/gpu/context.hpp
+58
-6
src/targets/gpu/include/migraphx/gpu/contiguous.hpp
src/targets/gpu/include/migraphx/gpu/contiguous.hpp
+15
-17
No files found.
src/targets/gpu/device/convert.cpp
0 → 100644
View file @
eb0d8fee
#include <migraphx/gpu/device/convert.hpp>
#include <migraphx/gpu/device/nary.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
void
convert
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
{
result
.
visit
([
&
](
auto
output
)
{
arg
.
visit
([
&
](
auto
input
)
{
const
auto
*
input_ptr
=
device_cast
(
input
.
data
());
auto
*
output_ptr
=
device_cast
(
output
.
data
());
gs_launch
(
stream
,
result
.
get_shape
().
elements
())([
=
](
auto
i
)
{
output_ptr
[
i
]
=
input_ptr
[
i
];
});
});
});
}
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/device/gather.cpp
View file @
eb0d8fee
...
...
@@ -16,20 +16,24 @@ argument gather(hipStream_t stream,
std
::
vector
<
migraphx
::
argument
>
args
,
int
axis
)
{
int
axis_index
=
(
axis
<
0
)
?
(
axis
+
outpu
t_shape
.
lens
().
size
())
:
axis
;
auto
axis_index
=
(
axis
<
0
)
?
(
axis
+
args
[
0
].
ge
t_shape
()
.
lens
().
size
())
:
axis
;
visit_all
(
args
.
back
(),
args
[
0
])([
&
](
auto
output
,
auto
input
)
{
std
::
size_t
nelements
=
output_shape
.
elements
();
args
[
1
].
visit
([
&
](
auto
indices
)
{
visit_tensor_size
(
output_shape
.
lens
().
size
(),
[
&
](
auto
ndim
)
{
const
auto
*
indices_ptr
=
device_cast
(
indices
.
data
());
auto
*
outptr
=
device_cast
(
output
.
data
());
const
auto
*
inptr
=
device_cast
(
input
.
data
());
hip_tensor_descriptor
<
ndim
>
desc_input
(
input
.
get_shape
());
hip_tensor_descriptor
<
ndim
>
desc_output
(
output
.
get_shape
());
gs_launch
(
stream
,
nelements
)([
=
](
auto
i
)
{
auto
lens
=
desc_output
.
multi
(
i
);
lens
[
axis_index
]
=
indices_ptr
[
lens
[
axis_index
]];
outptr
[
i
]
=
inptr
[
desc_input
.
linear
(
lens
)];
const
auto
*
indices_ptr
=
device_cast
(
indices
.
data
());
auto
*
out_ptr
=
device_cast
(
output
.
data
());
const
auto
*
in_ptr
=
device_cast
(
input
.
data
());
auto
&
input_shape
=
args
[
0
].
get_shape
();
auto
lens
=
input_shape
.
lens
();
lens
[
axis_index
]
=
args
[
1
].
get_shape
().
elements
();
migraphx
::
shape
out_comp_shape
{
output_shape
.
type
(),
lens
};
visit_tensor_size
(
out_comp_shape
.
lens
().
size
(),
[
&
](
auto
n_out_dim
)
{
hip_tensor_descriptor
<
n_out_dim
>
desc_input
(
input_shape
);
hip_tensor_descriptor
<
n_out_dim
>
desc_output
(
out_comp_shape
);
gs_launch
(
stream
,
nelements
)([
=
](
auto
ii
)
{
auto
in_idx
=
desc_output
.
multi
(
ii
);
in_idx
[
axis_index
]
=
indices_ptr
[
in_idx
[
axis_index
]];
out_ptr
[
ii
]
=
in_ptr
[
desc_input
.
linear
(
in_idx
)];
});
});
});
...
...
src/targets/gpu/device/logsoftmax.cpp
0 → 100644
View file @
eb0d8fee
#include <migraphx/shape.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/gpu/device/logsoftmax.hpp>
#include <migraphx/gpu/device/tensor.hpp>
#include <migraphx/gpu/device/launch.hpp>
#include <migraphx/gpu/device/types.hpp>
#include <migraphx/gpu/hip.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
argument
logsoftmax
(
hipStream_t
stream
,
const
migraphx
::
shape
&
output_shape
,
std
::
vector
<
migraphx
::
argument
>
args
,
int
axis
)
{
auto
lens
=
output_shape
.
lens
();
std
::
size_t
batch_size
=
std
::
accumulate
(
lens
.
begin
(),
lens
.
begin
()
+
axis
,
std
::
size_t
{
1
},
std
::
multiplies
<
std
::
size_t
>
());
std
::
size_t
n_dims
=
std
::
accumulate
(
lens
.
begin
()
+
axis
,
lens
.
end
(),
std
::
size_t
{
1
},
std
::
multiplies
<
std
::
size_t
>
());
migraphx
::
shape
comp_shape
{
output_shape
.
type
(),
{
batch_size
,
n_dims
}};
visit_all
(
args
.
back
(),
args
.
front
())([
&
](
auto
output
,
auto
input
)
{
const
auto
*
input_ptr
=
device_cast
(
input
.
data
());
auto
*
output_ptr
=
device_cast
(
output
.
data
());
// each thread is for one item in the batch
gs_launch
(
stream
,
batch_size
)([
=
](
auto
i
)
{
std
::
size_t
row_start
=
i
*
n_dims
;
// get max
auto
batch_max
=
input_ptr
[
row_start
];
for
(
std
::
size_t
j
=
1
;
j
<
n_dims
;
++
j
)
{
auto
ind
=
row_start
+
j
;
batch_max
=
std
::
max
(
to_hip_type
(
batch_max
),
to_hip_type
(
input_ptr
[
ind
]));
}
for
(
std
::
size_t
j
=
0
;
j
<
n_dims
;
++
j
)
{
auto
ind
=
row_start
+
j
;
output_ptr
[
ind
]
=
input_ptr
[
ind
]
-
batch_max
;
}
auto
batch_sum
=
::
exp
(
to_hip_type
(
output_ptr
[
row_start
]));
for
(
std
::
size_t
j
=
1
;
j
<
n_dims
;
++
j
)
{
auto
ind
=
row_start
+
j
;
batch_sum
+=
::
exp
(
to_hip_type
(
output_ptr
[
ind
]));
}
batch_sum
=
::
log
(
to_hip_type
(
batch_sum
));
for
(
std
::
size_t
j
=
0
;
j
<
n_dims
;
++
j
)
{
auto
ind
=
row_start
+
j
;
output_ptr
[
ind
]
-=
batch_sum
;
}
});
});
return
args
.
back
();
}
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/eliminate_workspace.cpp
View file @
eb0d8fee
...
...
@@ -2,7 +2,6 @@
#include <migraphx/gpu/hip.hpp>
#include <migraphx/program.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/operators.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/stringutils.hpp>
...
...
src/targets/gpu/elu.cpp
View file @
eb0d8fee
#include <migraphx/gpu/elu.hpp>
#include <migraphx/operators.hpp>
#include <migraphx/manage_ptr.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <utility>
#include <migraphx/gpu/context.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
...
src/targets/gpu/fuse_ops.cpp
View file @
eb0d8fee
...
...
@@ -3,6 +3,7 @@
#include <migraphx/gpu/miopen.hpp>
#include <migraphx/gpu/convolution.hpp>
#include <migraphx/gpu/device/add_relu.hpp>
#include <migraphx/gpu/device/add.hpp>
#include <migraphx/instruction.hpp>
namespace
migraphx
{
...
...
@@ -139,6 +140,8 @@ MIGRAPHX_PRED_MATCHER(fusable_conv, instruction_ref ins)
auto
conv
=
any_cast
<
miopen_convolution
>
(
ins
->
get_operator
());
if
(
conv
.
op
.
group
>
1
)
return
false
;
if
(
conv
.
op
.
padding_mode
!=
op
::
padding_mode_t
::
default_
)
return
false
;
if
(
wei
.
lens
()[
1
]
>
512
and
conv
.
algo
!=
miopenConvolutionFwdAlgoWinograd
)
return
false
;
auto
op
=
conv
.
op
;
...
...
@@ -159,7 +162,10 @@ struct hip_triadd
device
::
add
(
ctx
.
get_stream
().
get
(),
args
.
at
(
3
),
args
.
at
(
0
),
args
.
at
(
1
),
args
.
at
(
2
));
return
args
.
at
(
3
);
}
int
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
};
struct
hip_triadd_relu
...
...
@@ -175,7 +181,10 @@ struct hip_triadd_relu
device
::
add_relu
(
ctx
.
get_stream
().
get
(),
args
.
at
(
3
),
args
.
at
(
0
),
args
.
at
(
1
),
args
.
at
(
2
));
return
args
.
at
(
3
);
}
int
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
};
struct
hip_add_relu
...
...
@@ -191,7 +200,10 @@ struct hip_add_relu
device
::
add_relu
(
ctx
.
get_stream
().
get
(),
args
.
at
(
2
),
args
.
at
(
0
),
args
.
at
(
1
));
return
args
.
at
(
2
);
}
int
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
};
struct
find_add_relu
...
...
@@ -250,6 +262,12 @@ struct miopen_conv_bias
fusion
::
op_t
conv
;
fusion
::
op_t
bias
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
op
::
convolution
::
reflect
(
self
.
op
,
f
);
}
miopen_conv_bias
(
op
::
convolution
c
,
const
shape
&
input
,
const
shape
&
weights
,
const
shape
&
b
)
:
op
(
c
),
f
(
input
)
{
...
...
@@ -276,7 +294,10 @@ struct miopen_conv_bias
void
finalize
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
shape
>&
)
{
f
.
compile
(
ctx
);
}
shape
get_workspace
(
context
&
ctx
)
{
return
f
.
get_workspace
(
ctx
);
}
int
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
};
struct
miopen_conv_bias_relu
...
...
@@ -287,6 +308,12 @@ struct miopen_conv_bias_relu
fusion
::
op_t
bias
;
fusion
::
op_t
relu
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
op
::
convolution
::
reflect
(
self
.
op
,
f
);
}
miopen_conv_bias_relu
(
op
::
convolution
c
,
const
shape
&
input
,
const
shape
&
weights
,
...
...
@@ -317,7 +344,10 @@ struct miopen_conv_bias_relu
}
void
finalize
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
shape
>&
)
{
f
.
compile
(
ctx
);
}
shape
get_workspace
(
context
&
ctx
)
{
return
f
.
get_workspace
(
ctx
);
}
int
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
};
template
<
class
...
Ms
>
...
...
src/targets/gpu/gather.cpp
View file @
eb0d8fee
#include <migraphx/gpu/gather.hpp>
#include <migraphx/operators.hpp>
#include <migraphx/manage_ptr.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <migraphx/gpu/device/concat.hpp>
#include <utility>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/device/gather.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
...
src/targets/gpu/gemm.cpp
View file @
eb0d8fee
#include <migraphx/gpu/gemm.hpp>
#include <migraphx/operators.hpp>
#include <migraphx/manage_ptr.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <utility>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/device/add.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
template
<
class
...
Ts
>
void
generic_rocblas_
gemm
(
shape
::
as
<
float
>
,
Ts
&&
...
xs
)
rocblas_status
generic_rocblas_
scal
(
shape
::
as
<
float
>
,
Ts
&&
...
xs
)
{
rocblas_s
gemm
(
std
::
forward
<
Ts
>
(
xs
)...);
return
rocblas_s
scal
(
std
::
forward
<
Ts
>
(
xs
)...);
}
template
<
class
...
Ts
>
void
generic_rocblas_
gemm
(
shape
::
as
<
double
>
,
Ts
&&
...
xs
)
rocblas_status
generic_rocblas_
scal
(
shape
::
as
<
double
>
,
Ts
&&
...
xs
)
{
rocblas_dgemm
(
std
::
forward
<
Ts
>
(
xs
)...);
return
rocblas_dscal
(
std
::
forward
<
Ts
>
(
xs
)...);
}
template
<
class
T
,
class
...
Ts
>
rocblas_status
generic_rocblas_scal
(
shape
::
as
<
T
>
,
Ts
&&
...)
{
MIGRAPHX_THROW
(
"GENERIC_ROCBLAS_SCAL: type unsupported by rocblas"
);
}
template
<
class
...
Ts
>
rocblas_status
generic_rocblas_axpy
(
shape
::
as
<
half
>
,
Ts
&&
...
xs
)
{
return
rocblas_haxpy
(
std
::
forward
<
Ts
>
(
xs
)...);
}
template
<
class
...
Ts
>
rocblas_status
generic_rocblas_axpy
(
shape
::
as
<
float
>
,
Ts
&&
...
xs
)
{
return
rocblas_saxpy
(
std
::
forward
<
Ts
>
(
xs
)...);
}
template
<
class
...
Ts
>
rocblas_status
generic_rocblas_axpy
(
shape
::
as
<
double
>
,
Ts
&&
...
xs
)
{
return
rocblas_daxpy
(
std
::
forward
<
Ts
>
(
xs
)...);
}
template
<
class
T
,
class
...
Ts
>
rocblas_status
generic_rocblas_axpy
(
shape
::
as
<
T
>
,
Ts
&&
...)
{
MIGRAPHX_THROW
(
"GENERIC_ROCBLAS_AXPY: type unsupported by rocblas"
);
}
template
<
class
...
Ts
>
rocblas_status
generic_rocblas_dot
(
shape
::
as
<
float
>
,
Ts
&&
...
xs
)
{
return
rocblas_sdot
(
std
::
forward
<
Ts
>
(
xs
)...);
}
template
<
class
...
Ts
>
rocblas_status
generic_rocblas_dot
(
shape
::
as
<
double
>
,
Ts
&&
...
xs
)
{
return
rocblas_ddot
(
std
::
forward
<
Ts
>
(
xs
)...);
}
template
<
class
T
,
class
...
Ts
>
rocblas_status
generic_rocblas_dot
(
shape
::
as
<
T
>
,
Ts
&&
...)
{
MIGRAPHX_THROW
(
"GENERIC_ROCBLAS_DOT: type unsupported by rocblas"
);
}
template
<
class
...
Ts
>
rocblas_status
generic_rocblas_gemv
(
shape
::
as
<
float
>
,
Ts
&&
...
xs
)
{
return
rocblas_sgemv
(
std
::
forward
<
Ts
>
(
xs
)...);
}
template
<
class
...
Ts
>
void
generic_rocblas_gem
m
(
shape
::
as
<
half
>
,
Ts
&&
...
xs
)
rocblas_status
generic_rocblas_gem
v
(
shape
::
as
<
double
>
,
Ts
&&
...
xs
)
{
rocblas_
h
gem
m
(
std
::
forward
<
Ts
>
(
xs
)...);
return
rocblas_
d
gem
v
(
std
::
forward
<
Ts
>
(
xs
)...);
}
template
<
class
T
,
class
...
Ts
>
void
generic_rocblas_gem
m
(
shape
::
as
<
T
>
,
Ts
&&
...)
rocblas_status
generic_rocblas_gem
v
(
shape
::
as
<
T
>
,
Ts
&&
...)
{
MIGRAPHX_THROW
(
"Type unsupported by rocblas"
);
MIGRAPHX_THROW
(
"GENERIC_ROCBLAS_GEMMV: type unsupported by rocblas"
);
}
template
<
class
...
Ts
>
rocblas_status
generic_rocblas_batched_gemm
(
shape
::
as
<
float
>
,
Ts
&&
...
xs
)
{
return
rocblas_sgemm_strided_batched
(
std
::
forward
<
Ts
>
(
xs
)...);
}
template
<
class
...
Ts
>
rocblas_status
generic_rocblas_batched_gemm
(
shape
::
as
<
double
>
,
Ts
&&
...
xs
)
{
return
rocblas_dgemm_strided_batched
(
std
::
forward
<
Ts
>
(
xs
)...);
}
template
<
class
...
Ts
>
rocblas_status
generic_rocblas_batched_gemm
(
shape
::
as
<
half
>
,
Ts
&&
...
xs
)
{
return
rocblas_hgemm_strided_batched
(
std
::
forward
<
Ts
>
(
xs
)...);
}
template
<
class
T
,
class
...
Ts
>
rocblas_status
generic_rocblas_batched_gemm
(
shape
::
as
<
T
>
,
Ts
&&
...)
{
MIGRAPHX_THROW
(
"GENERIC_ROCBLAS_BATCHED_GEMM: type unsupported by rocblas"
);
}
template
<
class
...
Ts
>
rocblas_status
generic_rocblas_gemm
(
shape
::
as
<
float
>
,
Ts
&&
...
xs
)
{
return
rocblas_sgemm
(
std
::
forward
<
Ts
>
(
xs
)...);
}
template
<
class
...
Ts
>
rocblas_status
generic_rocblas_gemm
(
shape
::
as
<
double
>
,
Ts
&&
...
xs
)
{
return
rocblas_dgemm
(
std
::
forward
<
Ts
>
(
xs
)...);
}
template
<
class
...
Ts
>
rocblas_status
generic_rocblas_gemm
(
shape
::
as
<
half
>
,
Ts
&&
...
xs
)
{
return
rocblas_hgemm
(
std
::
forward
<
Ts
>
(
xs
)...);
}
template
<
class
T
,
class
...
Ts
>
rocblas_status
generic_rocblas_gemm
(
shape
::
as
<
T
>
,
Ts
&&
...)
{
MIGRAPHX_THROW
(
"GENERIC_ROCBLAS_GEMM: type unsupported by rocblas"
);
}
template
<
class
T
>
...
...
@@ -69,46 +169,94 @@ rocblas_half to_rocblas_type(half x) { return reinterpret_cast<const rocblas_hal
shape
miopen_gemm
::
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
check_shapes
{
inputs
,
*
this
}.
has
(
3
);
return
op
.
compute_shape
({
inputs
.
at
(
0
),
inputs
.
at
(
1
)});
std
::
vector
<
shape
>
input_shapes
(
inputs
.
begin
(),
inputs
.
begin
()
+
inputs
.
size
()
-
1
);
check_shapes
{
input_shapes
}.
not_broadcasted
();
return
op
.
compute_shape
(
input_shapes
);
}
argument
miopen_gemm
::
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
{
float
alpha
=
1.0
f
;
bool
is_3inputs
=
(
args
.
size
()
==
4
)
;
float
beta
=
0.0
f
;
bool
transa
=
args
[
0
].
get_shape
().
transposed
();
bool
transb
=
args
[
1
].
get_shape
().
transposed
();
rocblas_int
lda
=
args
[
0
].
get_shape
().
strides
()[
transa
?
1
:
0
];
rocblas_int
ldb
=
args
[
1
].
get_shape
().
strides
()[
transb
?
1
:
0
];
rocblas_int
ldc
=
args
[
2
].
get_shape
().
strides
()[
0
];
rocblas_int
m
=
output_shape
.
lens
()[
0
];
rocblas_int
n
=
output_shape
.
lens
()[
1
];
rocblas_int
k
=
args
[
0
].
get_shape
().
lens
()[
1
];
if
(
is_3inputs
)
{
output_shape
.
visit_type
([
&
](
auto
as
)
{
auto
to_pointer
=
[
&
](
auto
&&
arg
)
{
return
to_rocblas_type
(
as
.
from
(
arg
.
data
()));
};
hipMemcpyAsync
(
to_pointer
(
args
[
3
]),
to_pointer
(
args
[
2
]),
output_shape
.
bytes
(),
hipMemcpyDeviceToDevice
,
ctx
.
get_stream
().
get
());
});
beta
=
op
.
beta
;
}
auto
a_lens
=
args
[
0
].
get_shape
().
lens
();
auto
b_lens
=
args
[
1
].
get_shape
().
lens
();
output_shape
.
visit_type
([
&
](
auto
as
)
{
auto
alpha_r
=
to_rocblas_type
(
as
(
alpha
));
auto
beta_r
=
to_rocblas_type
(
as
(
beta
));
auto
n_dim
=
output_shape
.
lens
().
size
();
auto
dim_1
=
n_dim
-
1
;
auto
dim_0
=
n_dim
-
2
;
auto
alpha_r
=
to_rocblas_type
(
as
(
op
.
alpha
));
auto
beta_r
=
to_rocblas_type
(
as
(
beta
));
bool
transa
=
args
[
0
].
get_shape
().
transposed
();
bool
transb
=
args
[
1
].
get_shape
().
transposed
();
rocblas_int
lda
=
args
[
0
].
get_shape
().
strides
()[
transa
?
dim_1
:
dim_0
];
rocblas_int
ldb
=
args
[
1
].
get_shape
().
strides
()[
transb
?
dim_1
:
dim_0
];
rocblas_int
ldc
=
args
[
2
].
get_shape
().
strides
()[
dim_0
];
auto
out_lens
=
output_shape
.
lens
();
rocblas_int
m
=
out_lens
[
dim_0
];
rocblas_int
n
=
out_lens
[
dim_1
];
rocblas_int
k
=
args
[
0
].
get_shape
().
lens
()[
dim_1
];
auto
num_matrices
=
std
::
accumulate
(
out_lens
.
rbegin
()
+
2
,
out_lens
.
rend
(),
std
::
size_t
{
1
},
std
::
multiplies
<
std
::
size_t
>
());
auto
to_pointer
=
[
&
](
auto
&&
arg
)
{
return
to_rocblas_type
(
as
.
from
(
arg
.
data
()));
};
generic_rocblas_gemm
(
as
,
ctx
.
get_stream
().
get_rocblas
(),
transb
?
rocblas_operation_transpose
:
rocblas_operation_none
,
transa
?
rocblas_operation_transpose
:
rocblas_operation_none
,
n
,
m
,
k
,
&
alpha_r
,
to_pointer
(
args
[
1
]),
ldb
,
to_pointer
(
args
[
0
]),
lda
,
&
beta_r
,
to_pointer
(
args
[
2
]),
ldc
);
if
(
num_matrices
==
1
)
{
generic_rocblas_gemm
(
as
,
ctx
.
get_stream
().
get_rocblas
(),
transb
?
rocblas_operation_transpose
:
rocblas_operation_none
,
transa
?
rocblas_operation_transpose
:
rocblas_operation_none
,
n
,
m
,
k
,
&
alpha_r
,
to_pointer
(
args
[
1
]),
ldb
,
to_pointer
(
args
[
0
]),
lda
,
&
beta_r
,
(
is_3inputs
?
to_pointer
(
args
[
3
])
:
to_pointer
(
args
[
2
])),
ldc
);
}
else
{
generic_rocblas_batched_gemm
(
as
,
ctx
.
get_stream
().
get_rocblas
(),
transb
?
rocblas_operation_transpose
:
rocblas_operation_none
,
transa
?
rocblas_operation_transpose
:
rocblas_operation_none
,
n
,
m
,
k
,
&
alpha_r
,
to_pointer
(
args
[
1
]),
ldb
,
k
*
n
,
to_pointer
(
args
[
0
]),
lda
,
m
*
k
,
&
beta_r
,
(
is_3inputs
?
to_pointer
(
args
[
3
])
:
to_pointer
(
args
[
2
])),
ldc
,
m
*
n
,
num_matrices
);
}
});
return
args
[
2
];
return
(
is_3inputs
?
args
[
3
]
:
args
[
2
]
)
;
}
}
// namespace gpu
...
...
src/targets/gpu/hip.cpp
View file @
eb0d8fee
...
...
@@ -43,6 +43,7 @@ hip_ptr allocate_gpu(std::size_t sz, bool host = false)
template
<
class
T
>
std
::
vector
<
T
>
read_from_gpu
(
const
void
*
x
,
std
::
size_t
sz
)
{
gpu_sync
();
std
::
vector
<
T
>
result
(
sz
);
auto
status
=
hipMemcpy
(
result
.
data
(),
x
,
sz
*
sizeof
(
T
),
hipMemcpyDeviceToHost
);
if
(
status
!=
hipSuccess
)
...
...
@@ -52,6 +53,7 @@ std::vector<T> read_from_gpu(const void* x, std::size_t sz)
hip_ptr
write_to_gpu
(
const
void
*
x
,
std
::
size_t
sz
,
bool
host
=
false
)
{
gpu_sync
();
auto
result
=
allocate_gpu
(
sz
,
host
);
auto
status
=
hipMemcpy
(
result
.
get
(),
x
,
sz
,
hipMemcpyHostToDevice
);
if
(
status
!=
hipSuccess
)
...
...
src/targets/gpu/include/migraphx/gpu/abs.hpp
View file @
eb0d8fee
#ifndef MIGRAPHX_GUARD_RTGLIB_ABS_HPP
#define MIGRAPHX_GUARD_RTGLIB_ABS_HPP
#include <migraphx/gpu/lowering.hpp>
#include <migraphx/manage_ptr.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/operators.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/shape_for_each.hpp>
#include <migraphx/config.hpp>
#include <migraphx/shape.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <migraphx/gpu/hip.hpp>
#include <migraphx/dfor.hpp>
#include <migraphx/gpu/device/contiguous.hpp>
#include <migraphx/gpu/device/add.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/gpu/rocblas.hpp>
#include <migraphx/gpu/context.hpp>
#include <utility>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
struct
context
;
struct
miopen_abs
{
shared
<
activation_descriptor
>
ad
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
gpu
::
reflect
(
self
.
ad
.
get
(),
f
);
}
std
::
string
name
()
const
{
return
"gpu::abs"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
;
argument
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
;
int
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
};
}
// namespace gpu
...
...
src/targets/gpu/include/migraphx/gpu/acos.hpp
View file @
eb0d8fee
#ifndef MIGRAPHX_GUARD_RTGLIB_ACOS_HPP
#define MIGRAPHX_GUARD_RTGLIB_ACOS_HPP
#include <migraphx/gpu/lowering.hpp>
#include <migraphx/gpu/oper.hpp>
#include <migraphx/manage_ptr.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/operators.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/shape_for_each.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <migraphx/gpu/hip.hpp>
#include <migraphx/dfor.hpp>
#include <migraphx/gpu/device/contiguous.hpp>
#include <migraphx/gpu/device/acos.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/gpu/rocblas.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/config.hpp>
#include <utility>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
...
src/targets/gpu/include/migraphx/gpu/add.hpp
View file @
eb0d8fee
#ifndef MIGRAPHX_GUARD_RTGLIB_ADD_HPP
#define MIGRAPHX_GUARD_RTGLIB_ADD_HPP
#include <migraphx/gpu/lowering.hpp>
#include <migraphx/gpu/oper.hpp>
#include <migraphx/manage_ptr.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/operators.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/shape_for_each.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <migraphx/gpu/hip.hpp>
#include <migraphx/dfor.hpp>
#include <migraphx/gpu/device/contiguous.hpp>
#include <migraphx/gpu/device/add.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/gpu/rocblas.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/config.hpp>
#include <utility>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
...
src/targets/gpu/include/migraphx/gpu/adjust_allocation.hpp
0 → 100644
View file @
eb0d8fee
#ifndef MIGRAPHX_GUARD_RTGLIB_ADJUST_ALLOCATION_HPP
#define MIGRAPHX_GUARD_RTGLIB_ADJUST_ALLOCATION_HPP
#include <migraphx/program.hpp>
#include <migraphx/config.hpp>
#include <migraphx/gpu/context.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
struct
adjust_allocation
{
std
::
string
name
()
const
{
return
"gpu::adjust_allocation"
;
}
void
apply
(
program
&
p
)
const
;
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/include/migraphx/gpu/asin.hpp
View file @
eb0d8fee
#ifndef MIGRAPHX_GUARD_RTGLIB_ASIN_HPP
#define MIGRAPHX_GUARD_RTGLIB_ASIN_HPP
#include <migraphx/gpu/lowering.hpp>
#include <migraphx/gpu/oper.hpp>
#include <migraphx/manage_ptr.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/operators.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/shape_for_each.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <migraphx/gpu/hip.hpp>
#include <migraphx/dfor.hpp>
#include <migraphx/gpu/device/contiguous.hpp>
#include <migraphx/gpu/device/asin.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/gpu/rocblas.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/config.hpp>
#include <utility>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
...
src/targets/gpu/include/migraphx/gpu/atan.hpp
View file @
eb0d8fee
#ifndef MIGRAPHX_GUARD_RTGLIB_ATAN_HPP
#define MIGRAPHX_GUARD_RTGLIB_ATAN_HPP
#include <migraphx/gpu/lowering.hpp>
#include <migraphx/gpu/oper.hpp>
#include <migraphx/manage_ptr.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/operators.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/shape_for_each.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <migraphx/gpu/hip.hpp>
#include <migraphx/dfor.hpp>
#include <migraphx/gpu/device/contiguous.hpp>
#include <migraphx/gpu/device/atan.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/gpu/rocblas.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/config.hpp>
#include <utility>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
...
src/targets/gpu/include/migraphx/gpu/batchnorm.hpp
View file @
eb0d8fee
#ifndef MIGRAPHX_GUARD_RTGLIB_BATCHNORM_HPP
#define MIGRAPHX_GUARD_RTGLIB_BATCHNORM_HPP
#include <migraphx/gpu/lowering.hpp>
#include <migraphx/manage_ptr.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/operators.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/shape_for_each.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <migraphx/gpu/hip.hpp>
#include <migraphx/dfor.hpp>
#include <migraphx/gpu/device/contiguous.hpp>
#include <migraphx/gpu/device/add.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/gpu/rocblas.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/config.hpp>
#include <utility>
#include <migraphx/shape.hpp>
#include <migraphx/op/batch_norm.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
struct
context
;
struct
miopen_batch_norm_inference
{
op
::
batch_norm_inference
op
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
migraphx
::
reflect
(
self
.
op
,
f
);
}
std
::
string
name
()
const
{
return
"gpu::batch_norm_inference"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
;
argument
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
;
int
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
};
}
// namespace gpu
...
...
src/targets/gpu/include/migraphx/gpu/clip.hpp
0 → 100644
View file @
eb0d8fee
#ifndef MIGRAPHX_GUARD_RTGLIB_CLIP_HPP
#define MIGRAPHX_GUARD_RTGLIB_CLIP_HPP
#include <migraphx/shape.hpp>
#include <migraphx/op/clip.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
struct
context
;
struct
hip_clip
{
op
::
clip
op
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
migraphx
::
reflect
(
self
.
op
,
f
);
}
std
::
string
name
()
const
{
return
"gpu::clip"
;
}
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
;
argument
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
;
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/include/migraphx/gpu/concat.hpp
View file @
eb0d8fee
#ifndef MIGRAPHX_GUARD_RTGLIB_CONCAT_HPP
#define MIGRAPHX_GUARD_RTGLIB_CONCAT_HPP
#include <migraphx/gpu/lowering.hpp>
#include <migraphx/manage_ptr.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/operators.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/shape_for_each.hpp>
#include <migraphx/config.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <migraphx/gpu/hip.hpp>
#include <migraphx/dfor.hpp>
#include <migraphx/gpu/device/concat.hpp>
#include <migraphx/gpu/device/add.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/gpu/rocblas.hpp>
#include <migraphx/gpu/context.hpp>
#include <utility>
#include <migraphx/shape.hpp>
#include <migraphx/op/concat.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
struct
context
;
struct
hip_concat
{
op
::
concat
op
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
migraphx
::
reflect
(
self
.
op
,
f
);
}
std
::
string
name
()
const
{
return
"gpu::concat"
;
}
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
;
argument
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
;
int
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
};
}
// namespace gpu
...
...
src/targets/gpu/include/migraphx/gpu/context.hpp
View file @
eb0d8fee
...
...
@@ -11,13 +11,19 @@ namespace migraphx {
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_DISABLE_NULL_STREAM
)
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_ENABLE_NULL_STREAM
)
using
hip_event_ptr
=
MIGRAPHX_MANAGE_PTR
(
hipEvent_t
,
hipEventDestroy
);
struct
hip_device
{
hip_device
()
{
add_stream
();
}
hip_device
(
std
::
size_t
id
)
:
device_id
(
id
)
{
add_stream
();
}
hip_device
(
std
::
size_t
id
,
std
::
size_t
n
)
:
device_id
(
id
)
{
for
(
std
::
size_t
i
=
0
;
i
<
n
;
i
++
)
add_stream
();
}
struct
stream
{
...
...
@@ -32,7 +38,7 @@ struct hip_device
static
hip_stream_ptr
create_stream
()
{
hipStream_t
result
=
nullptr
;
auto
status
=
hipStreamCreate
(
&
result
);
auto
status
=
hipStreamCreate
WithFlags
(
&
result
,
hipStreamNonBlocking
);
if
(
status
!=
hipSuccess
)
MIGRAPHX_THROW
(
"Failed to allocate stream"
);
return
hip_stream_ptr
{
result
};
...
...
@@ -40,7 +46,7 @@ struct hip_device
hipStream_t
get
()
{
if
(
enabled
(
MIGRAPHX_
DIS
ABLE_NULL_STREAM
{}))
if
(
not
enabled
(
MIGRAPHX_
EN
ABLE_NULL_STREAM
{}))
{
setup
();
if
(
s
==
nullptr
)
...
...
@@ -53,7 +59,7 @@ struct hip_device
auto
create_miopen_handle
()
{
if
(
enabled
(
MIGRAPHX_
DIS
ABLE_NULL_STREAM
{}))
if
(
not
enabled
(
MIGRAPHX_
EN
ABLE_NULL_STREAM
{}))
return
make_obj
<
miopen_handle
>
(
&
miopenCreateWithStream
,
get
());
else
return
make_obj
<
miopen_handle
>
(
&
miopenCreate
);
...
...
@@ -77,6 +83,22 @@ struct hip_device
return
rbhandle
.
get
();
}
void
wait
(
hipEvent_t
event
)
{
setup
();
auto
status
=
hipStreamWaitEvent
(
get
(),
event
,
0
);
if
(
status
!=
hipSuccess
)
MIGRAPHX_THROW
(
"Failed to wait."
);
}
void
record
(
hipEvent_t
event
)
{
setup
();
auto
status
=
hipEventRecord
(
event
,
get
());
if
(
status
!=
hipSuccess
)
MIGRAPHX_THROW
(
"Failed to record."
);
}
private:
std
::
size_t
id
=
0
;
shared
<
hip_stream_ptr
>
s
=
nullptr
;
...
...
@@ -88,8 +110,14 @@ struct hip_device
stream
&
get_stream
()
{
return
streams
.
at
(
current_stream
);
}
stream
&
get_stream
(
std
::
size_t
n
)
{
return
streams
.
at
(
n
);
}
void
set_stream
(
std
::
size_t
n
)
{
current_stream
=
n
;
}
std
::
size_t
nstreams
()
const
{
return
streams
.
size
();
}
std
::
size_t
stream_id
()
const
{
return
current_stream
;
}
private:
std
::
size_t
device_id
=
0
;
std
::
size_t
current_stream
=
0
;
...
...
@@ -98,7 +126,10 @@ struct hip_device
struct
context
{
context
(
std
::
size_t
n
=
0
)
:
current_device
(
std
::
make_shared
<
hip_device
>
(
n
))
{}
context
(
std
::
size_t
device_id
=
0
,
std
::
size_t
n
=
4
)
:
current_device
(
std
::
make_shared
<
hip_device
>
(
device_id
,
n
))
{
}
hip_device
&
get_current_device
()
{
...
...
@@ -107,13 +138,34 @@ struct context
}
hip_device
::
stream
&
get_stream
()
{
return
get_current_device
().
get_stream
();
}
hip_device
::
stream
&
get_stream
(
std
::
size_t
n
)
{
return
get_current_device
().
get_stream
(
n
);
}
void
set_stream
(
std
::
size_t
n
)
{
get_current_device
().
set_stream
(
n
);
}
void
create_events
(
std
::
size_t
num_of_events
)
{
for
(
std
::
size_t
i
=
events
.
size
();
i
<
num_of_events
+
1
;
++
i
)
events
.
emplace_back
(
create_event
());
}
hipEvent_t
get_event
(
std
::
size_t
i
)
const
{
return
events
.
at
(
i
).
get
();
}
std
::
vector
<
argument
>
literals
{};
void
finish
()
const
{
gpu_sync
();
}
static
hip_event_ptr
create_event
()
{
hipEvent_t
event
;
auto
status
=
hipEventCreateWithFlags
(
&
event
,
hipEventDisableTiming
);
if
(
status
!=
hipSuccess
)
MIGRAPHX_THROW
(
"Failed to create event"
);
return
hip_event_ptr
{
event
};
}
private:
// TODO: Make this a vector to support multiple devices
std
::
shared_ptr
<
hip_device
>
current_device
;
std
::
vector
<
shared
<
hip_event_ptr
>>
events
;
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
...
...
src/targets/gpu/include/migraphx/gpu/contiguous.hpp
View file @
eb0d8fee
#ifndef MIGRAPHX_GUARD_RTGLIB_CONTIGUOUS_HPP
#define MIGRAPHX_GUARD_RTGLIB_CONTIGUOUS_HPP
#include <migraphx/gpu/lowering.hpp>
#include <migraphx/manage_ptr.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/operators.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/shape_for_each.hpp>
#include <migraphx/config.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <migraphx/gpu/hip.hpp>
#include <migraphx/dfor.hpp>
#include <migraphx/gpu/device/contiguous.hpp>
#include <migraphx/gpu/device/add.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/gpu/rocblas.hpp>
#include <migraphx/gpu/context.hpp>
#include <utility>
#include <migraphx/shape.hpp>
#include <migraphx/op/contiguous.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
struct
context
;
struct
miopen_contiguous
{
op
::
contiguous
op
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
migraphx
::
reflect
(
self
.
op
,
f
);
}
std
::
string
name
()
const
{
return
"gpu::contiguous"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
;
argument
compute
(
context
&
,
shape
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
;
int
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
};
}
// namespace gpu
...
...
Prev
1
…
4
5
6
7
8
9
10
11
12
…
16
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment