Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
eb0d8fee
"doc/vscode:/vscode.git/clone" did not exist on "1c2c26989d12841ceaf5ef37c5fc994b6ad731c8"
Commit
eb0d8fee
authored
Jun 04, 2019
by
Paul
Browse files
Merge branch 'develop' into driver
parents
65ef35cd
0d796941
Changes
320
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
515 additions
and
201 deletions
+515
-201
src/targets/gpu/device/convert.cpp
src/targets/gpu/device/convert.cpp
+24
-0
src/targets/gpu/device/gather.cpp
src/targets/gpu/device/gather.cpp
+15
-11
src/targets/gpu/device/logsoftmax.cpp
src/targets/gpu/device/logsoftmax.cpp
+70
-0
src/targets/gpu/eliminate_workspace.cpp
src/targets/gpu/eliminate_workspace.cpp
+0
-1
src/targets/gpu/elu.cpp
src/targets/gpu/elu.cpp
+1
-4
src/targets/gpu/fuse_ops.cpp
src/targets/gpu/fuse_ops.cpp
+35
-5
src/targets/gpu/gather.cpp
src/targets/gpu/gather.cpp
+2
-5
src/targets/gpu/gemm.cpp
src/targets/gpu/gemm.cpp
+190
-42
src/targets/gpu/hip.cpp
src/targets/gpu/hip.cpp
+2
-0
src/targets/gpu/include/migraphx/gpu/abs.hpp
src/targets/gpu/include/migraphx/gpu/abs.hpp
+14
-16
src/targets/gpu/include/migraphx/gpu/acos.hpp
src/targets/gpu/include/migraphx/gpu/acos.hpp
+0
-15
src/targets/gpu/include/migraphx/gpu/add.hpp
src/targets/gpu/include/migraphx/gpu/add.hpp
+0
-15
src/targets/gpu/include/migraphx/gpu/adjust_allocation.hpp
src/targets/gpu/include/migraphx/gpu/adjust_allocation.hpp
+23
-0
src/targets/gpu/include/migraphx/gpu/asin.hpp
src/targets/gpu/include/migraphx/gpu/asin.hpp
+0
-15
src/targets/gpu/include/migraphx/gpu/atan.hpp
src/targets/gpu/include/migraphx/gpu/atan.hpp
+0
-15
src/targets/gpu/include/migraphx/gpu/batchnorm.hpp
src/targets/gpu/include/migraphx/gpu/batchnorm.hpp
+15
-17
src/targets/gpu/include/migraphx/gpu/clip.hpp
src/targets/gpu/include/migraphx/gpu/clip.hpp
+37
-0
src/targets/gpu/include/migraphx/gpu/concat.hpp
src/targets/gpu/include/migraphx/gpu/concat.hpp
+14
-17
src/targets/gpu/include/migraphx/gpu/context.hpp
src/targets/gpu/include/migraphx/gpu/context.hpp
+58
-6
src/targets/gpu/include/migraphx/gpu/contiguous.hpp
src/targets/gpu/include/migraphx/gpu/contiguous.hpp
+15
-17
No files found.
src/targets/gpu/device/convert.cpp
0 → 100644
View file @
eb0d8fee
#include <migraphx/gpu/device/convert.hpp>
#include <migraphx/gpu/device/nary.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
void
convert
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
{
result
.
visit
([
&
](
auto
output
)
{
arg
.
visit
([
&
](
auto
input
)
{
const
auto
*
input_ptr
=
device_cast
(
input
.
data
());
auto
*
output_ptr
=
device_cast
(
output
.
data
());
gs_launch
(
stream
,
result
.
get_shape
().
elements
())([
=
](
auto
i
)
{
output_ptr
[
i
]
=
input_ptr
[
i
];
});
});
});
}
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/device/gather.cpp
View file @
eb0d8fee
...
@@ -16,20 +16,24 @@ argument gather(hipStream_t stream,
...
@@ -16,20 +16,24 @@ argument gather(hipStream_t stream,
std
::
vector
<
migraphx
::
argument
>
args
,
std
::
vector
<
migraphx
::
argument
>
args
,
int
axis
)
int
axis
)
{
{
int
axis_index
=
(
axis
<
0
)
?
(
axis
+
outpu
t_shape
.
lens
().
size
())
:
axis
;
auto
axis_index
=
(
axis
<
0
)
?
(
axis
+
args
[
0
].
ge
t_shape
()
.
lens
().
size
())
:
axis
;
visit_all
(
args
.
back
(),
args
[
0
])([
&
](
auto
output
,
auto
input
)
{
visit_all
(
args
.
back
(),
args
[
0
])([
&
](
auto
output
,
auto
input
)
{
std
::
size_t
nelements
=
output_shape
.
elements
();
std
::
size_t
nelements
=
output_shape
.
elements
();
args
[
1
].
visit
([
&
](
auto
indices
)
{
args
[
1
].
visit
([
&
](
auto
indices
)
{
visit_tensor_size
(
output_shape
.
lens
().
size
(),
[
&
](
auto
ndim
)
{
const
auto
*
indices_ptr
=
device_cast
(
indices
.
data
());
const
auto
*
indices_ptr
=
device_cast
(
indices
.
data
());
auto
*
out_ptr
=
device_cast
(
output
.
data
());
auto
*
outptr
=
device_cast
(
output
.
data
());
const
auto
*
in_ptr
=
device_cast
(
input
.
data
());
const
auto
*
inptr
=
device_cast
(
input
.
data
());
auto
&
input_shape
=
args
[
0
].
get_shape
();
hip_tensor_descriptor
<
ndim
>
desc_input
(
input
.
get_shape
());
auto
lens
=
input_shape
.
lens
();
hip_tensor_descriptor
<
ndim
>
desc_output
(
output
.
get_shape
());
lens
[
axis_index
]
=
args
[
1
].
get_shape
().
elements
();
gs_launch
(
stream
,
nelements
)([
=
](
auto
i
)
{
migraphx
::
shape
out_comp_shape
{
output_shape
.
type
(),
lens
};
auto
lens
=
desc_output
.
multi
(
i
);
visit_tensor_size
(
out_comp_shape
.
lens
().
size
(),
[
&
](
auto
n_out_dim
)
{
lens
[
axis_index
]
=
indices_ptr
[
lens
[
axis_index
]];
hip_tensor_descriptor
<
n_out_dim
>
desc_input
(
input_shape
);
outptr
[
i
]
=
inptr
[
desc_input
.
linear
(
lens
)];
hip_tensor_descriptor
<
n_out_dim
>
desc_output
(
out_comp_shape
);
gs_launch
(
stream
,
nelements
)([
=
](
auto
ii
)
{
auto
in_idx
=
desc_output
.
multi
(
ii
);
in_idx
[
axis_index
]
=
indices_ptr
[
in_idx
[
axis_index
]];
out_ptr
[
ii
]
=
in_ptr
[
desc_input
.
linear
(
in_idx
)];
});
});
});
});
});
});
...
...
src/targets/gpu/device/logsoftmax.cpp
0 → 100644
View file @
eb0d8fee
#include <migraphx/shape.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/gpu/device/logsoftmax.hpp>
#include <migraphx/gpu/device/tensor.hpp>
#include <migraphx/gpu/device/launch.hpp>
#include <migraphx/gpu/device/types.hpp>
#include <migraphx/gpu/hip.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
argument
logsoftmax
(
hipStream_t
stream
,
const
migraphx
::
shape
&
output_shape
,
std
::
vector
<
migraphx
::
argument
>
args
,
int
axis
)
{
auto
lens
=
output_shape
.
lens
();
std
::
size_t
batch_size
=
std
::
accumulate
(
lens
.
begin
(),
lens
.
begin
()
+
axis
,
std
::
size_t
{
1
},
std
::
multiplies
<
std
::
size_t
>
());
std
::
size_t
n_dims
=
std
::
accumulate
(
lens
.
begin
()
+
axis
,
lens
.
end
(),
std
::
size_t
{
1
},
std
::
multiplies
<
std
::
size_t
>
());
migraphx
::
shape
comp_shape
{
output_shape
.
type
(),
{
batch_size
,
n_dims
}};
visit_all
(
args
.
back
(),
args
.
front
())([
&
](
auto
output
,
auto
input
)
{
const
auto
*
input_ptr
=
device_cast
(
input
.
data
());
auto
*
output_ptr
=
device_cast
(
output
.
data
());
// each thread is for one item in the batch
gs_launch
(
stream
,
batch_size
)([
=
](
auto
i
)
{
std
::
size_t
row_start
=
i
*
n_dims
;
// get max
auto
batch_max
=
input_ptr
[
row_start
];
for
(
std
::
size_t
j
=
1
;
j
<
n_dims
;
++
j
)
{
auto
ind
=
row_start
+
j
;
batch_max
=
std
::
max
(
to_hip_type
(
batch_max
),
to_hip_type
(
input_ptr
[
ind
]));
}
for
(
std
::
size_t
j
=
0
;
j
<
n_dims
;
++
j
)
{
auto
ind
=
row_start
+
j
;
output_ptr
[
ind
]
=
input_ptr
[
ind
]
-
batch_max
;
}
auto
batch_sum
=
::
exp
(
to_hip_type
(
output_ptr
[
row_start
]));
for
(
std
::
size_t
j
=
1
;
j
<
n_dims
;
++
j
)
{
auto
ind
=
row_start
+
j
;
batch_sum
+=
::
exp
(
to_hip_type
(
output_ptr
[
ind
]));
}
batch_sum
=
::
log
(
to_hip_type
(
batch_sum
));
for
(
std
::
size_t
j
=
0
;
j
<
n_dims
;
++
j
)
{
auto
ind
=
row_start
+
j
;
output_ptr
[
ind
]
-=
batch_sum
;
}
});
});
return
args
.
back
();
}
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/eliminate_workspace.cpp
View file @
eb0d8fee
...
@@ -2,7 +2,6 @@
...
@@ -2,7 +2,6 @@
#include <migraphx/gpu/hip.hpp>
#include <migraphx/gpu/hip.hpp>
#include <migraphx/program.hpp>
#include <migraphx/program.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/operators.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/stringutils.hpp>
...
...
src/targets/gpu/elu.cpp
View file @
eb0d8fee
#include <migraphx/gpu/elu.hpp>
#include <migraphx/gpu/elu.hpp>
#include <migraphx/operators.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/manage_ptr.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <utility>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
...
src/targets/gpu/fuse_ops.cpp
View file @
eb0d8fee
...
@@ -3,6 +3,7 @@
...
@@ -3,6 +3,7 @@
#include <migraphx/gpu/miopen.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <migraphx/gpu/convolution.hpp>
#include <migraphx/gpu/convolution.hpp>
#include <migraphx/gpu/device/add_relu.hpp>
#include <migraphx/gpu/device/add_relu.hpp>
#include <migraphx/gpu/device/add.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/instruction.hpp>
namespace
migraphx
{
namespace
migraphx
{
...
@@ -139,6 +140,8 @@ MIGRAPHX_PRED_MATCHER(fusable_conv, instruction_ref ins)
...
@@ -139,6 +140,8 @@ MIGRAPHX_PRED_MATCHER(fusable_conv, instruction_ref ins)
auto
conv
=
any_cast
<
miopen_convolution
>
(
ins
->
get_operator
());
auto
conv
=
any_cast
<
miopen_convolution
>
(
ins
->
get_operator
());
if
(
conv
.
op
.
group
>
1
)
if
(
conv
.
op
.
group
>
1
)
return
false
;
return
false
;
if
(
conv
.
op
.
padding_mode
!=
op
::
padding_mode_t
::
default_
)
return
false
;
if
(
wei
.
lens
()[
1
]
>
512
and
conv
.
algo
!=
miopenConvolutionFwdAlgoWinograd
)
if
(
wei
.
lens
()[
1
]
>
512
and
conv
.
algo
!=
miopenConvolutionFwdAlgoWinograd
)
return
false
;
return
false
;
auto
op
=
conv
.
op
;
auto
op
=
conv
.
op
;
...
@@ -159,7 +162,10 @@ struct hip_triadd
...
@@ -159,7 +162,10 @@ struct hip_triadd
device
::
add
(
ctx
.
get_stream
().
get
(),
args
.
at
(
3
),
args
.
at
(
0
),
args
.
at
(
1
),
args
.
at
(
2
));
device
::
add
(
ctx
.
get_stream
().
get
(),
args
.
at
(
3
),
args
.
at
(
0
),
args
.
at
(
1
),
args
.
at
(
2
));
return
args
.
at
(
3
);
return
args
.
at
(
3
);
}
}
int
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
};
};
struct
hip_triadd_relu
struct
hip_triadd_relu
...
@@ -175,7 +181,10 @@ struct hip_triadd_relu
...
@@ -175,7 +181,10 @@ struct hip_triadd_relu
device
::
add_relu
(
ctx
.
get_stream
().
get
(),
args
.
at
(
3
),
args
.
at
(
0
),
args
.
at
(
1
),
args
.
at
(
2
));
device
::
add_relu
(
ctx
.
get_stream
().
get
(),
args
.
at
(
3
),
args
.
at
(
0
),
args
.
at
(
1
),
args
.
at
(
2
));
return
args
.
at
(
3
);
return
args
.
at
(
3
);
}
}
int
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
};
};
struct
hip_add_relu
struct
hip_add_relu
...
@@ -191,7 +200,10 @@ struct hip_add_relu
...
@@ -191,7 +200,10 @@ struct hip_add_relu
device
::
add_relu
(
ctx
.
get_stream
().
get
(),
args
.
at
(
2
),
args
.
at
(
0
),
args
.
at
(
1
));
device
::
add_relu
(
ctx
.
get_stream
().
get
(),
args
.
at
(
2
),
args
.
at
(
0
),
args
.
at
(
1
));
return
args
.
at
(
2
);
return
args
.
at
(
2
);
}
}
int
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
};
};
struct
find_add_relu
struct
find_add_relu
...
@@ -250,6 +262,12 @@ struct miopen_conv_bias
...
@@ -250,6 +262,12 @@ struct miopen_conv_bias
fusion
::
op_t
conv
;
fusion
::
op_t
conv
;
fusion
::
op_t
bias
;
fusion
::
op_t
bias
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
op
::
convolution
::
reflect
(
self
.
op
,
f
);
}
miopen_conv_bias
(
op
::
convolution
c
,
const
shape
&
input
,
const
shape
&
weights
,
const
shape
&
b
)
miopen_conv_bias
(
op
::
convolution
c
,
const
shape
&
input
,
const
shape
&
weights
,
const
shape
&
b
)
:
op
(
c
),
f
(
input
)
:
op
(
c
),
f
(
input
)
{
{
...
@@ -276,7 +294,10 @@ struct miopen_conv_bias
...
@@ -276,7 +294,10 @@ struct miopen_conv_bias
void
finalize
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
shape
>&
)
{
f
.
compile
(
ctx
);
}
void
finalize
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
shape
>&
)
{
f
.
compile
(
ctx
);
}
shape
get_workspace
(
context
&
ctx
)
{
return
f
.
get_workspace
(
ctx
);
}
shape
get_workspace
(
context
&
ctx
)
{
return
f
.
get_workspace
(
ctx
);
}
int
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
};
};
struct
miopen_conv_bias_relu
struct
miopen_conv_bias_relu
...
@@ -287,6 +308,12 @@ struct miopen_conv_bias_relu
...
@@ -287,6 +308,12 @@ struct miopen_conv_bias_relu
fusion
::
op_t
bias
;
fusion
::
op_t
bias
;
fusion
::
op_t
relu
;
fusion
::
op_t
relu
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
op
::
convolution
::
reflect
(
self
.
op
,
f
);
}
miopen_conv_bias_relu
(
op
::
convolution
c
,
miopen_conv_bias_relu
(
op
::
convolution
c
,
const
shape
&
input
,
const
shape
&
input
,
const
shape
&
weights
,
const
shape
&
weights
,
...
@@ -317,7 +344,10 @@ struct miopen_conv_bias_relu
...
@@ -317,7 +344,10 @@ struct miopen_conv_bias_relu
}
}
void
finalize
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
shape
>&
)
{
f
.
compile
(
ctx
);
}
void
finalize
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
shape
>&
)
{
f
.
compile
(
ctx
);
}
shape
get_workspace
(
context
&
ctx
)
{
return
f
.
get_workspace
(
ctx
);
}
shape
get_workspace
(
context
&
ctx
)
{
return
f
.
get_workspace
(
ctx
);
}
int
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
};
};
template
<
class
...
Ms
>
template
<
class
...
Ms
>
...
...
src/targets/gpu/gather.cpp
View file @
eb0d8fee
#include <migraphx/gpu/gather.hpp>
#include <migraphx/gpu/gather.hpp>
#include <migraphx/operators.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/manage_ptr.hpp>
#include <migraphx/gpu/device/gather.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <migraphx/gpu/device/concat.hpp>
#include <utility>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
...
src/targets/gpu/gemm.cpp
View file @
eb0d8fee
#include <migraphx/gpu/gemm.hpp>
#include <migraphx/gpu/gemm.hpp>
#include <migraphx/operators.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/manage_ptr.hpp>
#include <migraphx/gpu/device/add.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <utility>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
gpu
{
template
<
class
...
Ts
>
template
<
class
...
Ts
>
void
generic_rocblas_
gemm
(
shape
::
as
<
float
>
,
Ts
&&
...
xs
)
rocblas_status
generic_rocblas_
scal
(
shape
::
as
<
float
>
,
Ts
&&
...
xs
)
{
{
rocblas_s
gemm
(
std
::
forward
<
Ts
>
(
xs
)...);
return
rocblas_s
scal
(
std
::
forward
<
Ts
>
(
xs
)...);
}
}
template
<
class
...
Ts
>
template
<
class
...
Ts
>
void
generic_rocblas_
gemm
(
shape
::
as
<
double
>
,
Ts
&&
...
xs
)
rocblas_status
generic_rocblas_
scal
(
shape
::
as
<
double
>
,
Ts
&&
...
xs
)
{
{
rocblas_dgemm
(
std
::
forward
<
Ts
>
(
xs
)...);
return
rocblas_dscal
(
std
::
forward
<
Ts
>
(
xs
)...);
}
template
<
class
T
,
class
...
Ts
>
rocblas_status
generic_rocblas_scal
(
shape
::
as
<
T
>
,
Ts
&&
...)
{
MIGRAPHX_THROW
(
"GENERIC_ROCBLAS_SCAL: type unsupported by rocblas"
);
}
template
<
class
...
Ts
>
rocblas_status
generic_rocblas_axpy
(
shape
::
as
<
half
>
,
Ts
&&
...
xs
)
{
return
rocblas_haxpy
(
std
::
forward
<
Ts
>
(
xs
)...);
}
template
<
class
...
Ts
>
rocblas_status
generic_rocblas_axpy
(
shape
::
as
<
float
>
,
Ts
&&
...
xs
)
{
return
rocblas_saxpy
(
std
::
forward
<
Ts
>
(
xs
)...);
}
template
<
class
...
Ts
>
rocblas_status
generic_rocblas_axpy
(
shape
::
as
<
double
>
,
Ts
&&
...
xs
)
{
return
rocblas_daxpy
(
std
::
forward
<
Ts
>
(
xs
)...);
}
template
<
class
T
,
class
...
Ts
>
rocblas_status
generic_rocblas_axpy
(
shape
::
as
<
T
>
,
Ts
&&
...)
{
MIGRAPHX_THROW
(
"GENERIC_ROCBLAS_AXPY: type unsupported by rocblas"
);
}
template
<
class
...
Ts
>
rocblas_status
generic_rocblas_dot
(
shape
::
as
<
float
>
,
Ts
&&
...
xs
)
{
return
rocblas_sdot
(
std
::
forward
<
Ts
>
(
xs
)...);
}
template
<
class
...
Ts
>
rocblas_status
generic_rocblas_dot
(
shape
::
as
<
double
>
,
Ts
&&
...
xs
)
{
return
rocblas_ddot
(
std
::
forward
<
Ts
>
(
xs
)...);
}
template
<
class
T
,
class
...
Ts
>
rocblas_status
generic_rocblas_dot
(
shape
::
as
<
T
>
,
Ts
&&
...)
{
MIGRAPHX_THROW
(
"GENERIC_ROCBLAS_DOT: type unsupported by rocblas"
);
}
template
<
class
...
Ts
>
rocblas_status
generic_rocblas_gemv
(
shape
::
as
<
float
>
,
Ts
&&
...
xs
)
{
return
rocblas_sgemv
(
std
::
forward
<
Ts
>
(
xs
)...);
}
}
template
<
class
...
Ts
>
template
<
class
...
Ts
>
void
generic_rocblas_gem
m
(
shape
::
as
<
half
>
,
Ts
&&
...
xs
)
rocblas_status
generic_rocblas_gem
v
(
shape
::
as
<
double
>
,
Ts
&&
...
xs
)
{
{
rocblas_
h
gem
m
(
std
::
forward
<
Ts
>
(
xs
)...);
return
rocblas_
d
gem
v
(
std
::
forward
<
Ts
>
(
xs
)...);
}
}
template
<
class
T
,
class
...
Ts
>
template
<
class
T
,
class
...
Ts
>
void
generic_rocblas_gem
m
(
shape
::
as
<
T
>
,
Ts
&&
...)
rocblas_status
generic_rocblas_gem
v
(
shape
::
as
<
T
>
,
Ts
&&
...)
{
{
MIGRAPHX_THROW
(
"Type unsupported by rocblas"
);
MIGRAPHX_THROW
(
"GENERIC_ROCBLAS_GEMMV: type unsupported by rocblas"
);
}
template
<
class
...
Ts
>
rocblas_status
generic_rocblas_batched_gemm
(
shape
::
as
<
float
>
,
Ts
&&
...
xs
)
{
return
rocblas_sgemm_strided_batched
(
std
::
forward
<
Ts
>
(
xs
)...);
}
template
<
class
...
Ts
>
rocblas_status
generic_rocblas_batched_gemm
(
shape
::
as
<
double
>
,
Ts
&&
...
xs
)
{
return
rocblas_dgemm_strided_batched
(
std
::
forward
<
Ts
>
(
xs
)...);
}
template
<
class
...
Ts
>
rocblas_status
generic_rocblas_batched_gemm
(
shape
::
as
<
half
>
,
Ts
&&
...
xs
)
{
return
rocblas_hgemm_strided_batched
(
std
::
forward
<
Ts
>
(
xs
)...);
}
template
<
class
T
,
class
...
Ts
>
rocblas_status
generic_rocblas_batched_gemm
(
shape
::
as
<
T
>
,
Ts
&&
...)
{
MIGRAPHX_THROW
(
"GENERIC_ROCBLAS_BATCHED_GEMM: type unsupported by rocblas"
);
}
template
<
class
...
Ts
>
rocblas_status
generic_rocblas_gemm
(
shape
::
as
<
float
>
,
Ts
&&
...
xs
)
{
return
rocblas_sgemm
(
std
::
forward
<
Ts
>
(
xs
)...);
}
template
<
class
...
Ts
>
rocblas_status
generic_rocblas_gemm
(
shape
::
as
<
double
>
,
Ts
&&
...
xs
)
{
return
rocblas_dgemm
(
std
::
forward
<
Ts
>
(
xs
)...);
}
template
<
class
...
Ts
>
rocblas_status
generic_rocblas_gemm
(
shape
::
as
<
half
>
,
Ts
&&
...
xs
)
{
return
rocblas_hgemm
(
std
::
forward
<
Ts
>
(
xs
)...);
}
template
<
class
T
,
class
...
Ts
>
rocblas_status
generic_rocblas_gemm
(
shape
::
as
<
T
>
,
Ts
&&
...)
{
MIGRAPHX_THROW
(
"GENERIC_ROCBLAS_GEMM: type unsupported by rocblas"
);
}
}
template
<
class
T
>
template
<
class
T
>
...
@@ -69,46 +169,94 @@ rocblas_half to_rocblas_type(half x) { return reinterpret_cast<const rocblas_hal
...
@@ -69,46 +169,94 @@ rocblas_half to_rocblas_type(half x) { return reinterpret_cast<const rocblas_hal
shape
miopen_gemm
::
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
shape
miopen_gemm
::
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
{
check_shapes
{
inputs
,
*
this
}.
has
(
3
);
std
::
vector
<
shape
>
input_shapes
(
inputs
.
begin
(),
inputs
.
begin
()
+
inputs
.
size
()
-
1
);
return
op
.
compute_shape
({
inputs
.
at
(
0
),
inputs
.
at
(
1
)});
check_shapes
{
input_shapes
}.
not_broadcasted
();
return
op
.
compute_shape
(
input_shapes
);
}
}
argument
miopen_gemm
::
compute
(
context
&
ctx
,
argument
miopen_gemm
::
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
const
std
::
vector
<
argument
>&
args
)
const
{
{
float
alpha
=
1.0
f
;
bool
is_3inputs
=
(
args
.
size
()
==
4
)
;
float
beta
=
0.0
f
;
float
beta
=
0.0
f
;
bool
transa
=
args
[
0
].
get_shape
().
transposed
();
if
(
is_3inputs
)
bool
transb
=
args
[
1
].
get_shape
().
transposed
();
{
rocblas_int
lda
=
args
[
0
].
get_shape
().
strides
()[
transa
?
1
:
0
];
output_shape
.
visit_type
([
&
](
auto
as
)
{
rocblas_int
ldb
=
args
[
1
].
get_shape
().
strides
()[
transb
?
1
:
0
];
auto
to_pointer
=
[
&
](
auto
&&
arg
)
{
return
to_rocblas_type
(
as
.
from
(
arg
.
data
()));
};
rocblas_int
ldc
=
args
[
2
].
get_shape
().
strides
()[
0
];
hipMemcpyAsync
(
to_pointer
(
args
[
3
]),
rocblas_int
m
=
output_shape
.
lens
()[
0
];
to_pointer
(
args
[
2
]),
rocblas_int
n
=
output_shape
.
lens
()[
1
];
output_shape
.
bytes
(),
rocblas_int
k
=
args
[
0
].
get_shape
().
lens
()[
1
];
hipMemcpyDeviceToDevice
,
ctx
.
get_stream
().
get
());
});
beta
=
op
.
beta
;
}
auto
a_lens
=
args
[
0
].
get_shape
().
lens
();
auto
b_lens
=
args
[
1
].
get_shape
().
lens
();
output_shape
.
visit_type
([
&
](
auto
as
)
{
output_shape
.
visit_type
([
&
](
auto
as
)
{
auto
alpha_r
=
to_rocblas_type
(
as
(
alpha
));
auto
n_dim
=
output_shape
.
lens
().
size
();
auto
beta_r
=
to_rocblas_type
(
as
(
beta
));
auto
dim_1
=
n_dim
-
1
;
auto
dim_0
=
n_dim
-
2
;
auto
alpha_r
=
to_rocblas_type
(
as
(
op
.
alpha
));
auto
beta_r
=
to_rocblas_type
(
as
(
beta
));
bool
transa
=
args
[
0
].
get_shape
().
transposed
();
bool
transb
=
args
[
1
].
get_shape
().
transposed
();
rocblas_int
lda
=
args
[
0
].
get_shape
().
strides
()[
transa
?
dim_1
:
dim_0
];
rocblas_int
ldb
=
args
[
1
].
get_shape
().
strides
()[
transb
?
dim_1
:
dim_0
];
rocblas_int
ldc
=
args
[
2
].
get_shape
().
strides
()[
dim_0
];
auto
out_lens
=
output_shape
.
lens
();
rocblas_int
m
=
out_lens
[
dim_0
];
rocblas_int
n
=
out_lens
[
dim_1
];
rocblas_int
k
=
args
[
0
].
get_shape
().
lens
()[
dim_1
];
auto
num_matrices
=
std
::
accumulate
(
out_lens
.
rbegin
()
+
2
,
out_lens
.
rend
(),
std
::
size_t
{
1
},
std
::
multiplies
<
std
::
size_t
>
());
auto
to_pointer
=
[
&
](
auto
&&
arg
)
{
return
to_rocblas_type
(
as
.
from
(
arg
.
data
()));
};
auto
to_pointer
=
[
&
](
auto
&&
arg
)
{
return
to_rocblas_type
(
as
.
from
(
arg
.
data
()));
};
generic_rocblas_gemm
(
as
,
if
(
num_matrices
==
1
)
ctx
.
get_stream
().
get_rocblas
(),
{
transb
?
rocblas_operation_transpose
:
rocblas_operation_none
,
generic_rocblas_gemm
(
as
,
transa
?
rocblas_operation_transpose
:
rocblas_operation_none
,
ctx
.
get_stream
().
get_rocblas
(),
n
,
transb
?
rocblas_operation_transpose
:
rocblas_operation_none
,
m
,
transa
?
rocblas_operation_transpose
:
rocblas_operation_none
,
k
,
n
,
&
alpha_r
,
m
,
to_pointer
(
args
[
1
]),
k
,
ldb
,
&
alpha_r
,
to_pointer
(
args
[
0
]),
to_pointer
(
args
[
1
]),
lda
,
ldb
,
&
beta_r
,
to_pointer
(
args
[
0
]),
to_pointer
(
args
[
2
]),
lda
,
ldc
);
&
beta_r
,
(
is_3inputs
?
to_pointer
(
args
[
3
])
:
to_pointer
(
args
[
2
])),
ldc
);
}
else
{
generic_rocblas_batched_gemm
(
as
,
ctx
.
get_stream
().
get_rocblas
(),
transb
?
rocblas_operation_transpose
:
rocblas_operation_none
,
transa
?
rocblas_operation_transpose
:
rocblas_operation_none
,
n
,
m
,
k
,
&
alpha_r
,
to_pointer
(
args
[
1
]),
ldb
,
k
*
n
,
to_pointer
(
args
[
0
]),
lda
,
m
*
k
,
&
beta_r
,
(
is_3inputs
?
to_pointer
(
args
[
3
])
:
to_pointer
(
args
[
2
])),
ldc
,
m
*
n
,
num_matrices
);
}
});
});
return
args
[
2
];
return
(
is_3inputs
?
args
[
3
]
:
args
[
2
]
)
;
}
}
}
// namespace gpu
}
// namespace gpu
...
...
src/targets/gpu/hip.cpp
View file @
eb0d8fee
...
@@ -43,6 +43,7 @@ hip_ptr allocate_gpu(std::size_t sz, bool host = false)
...
@@ -43,6 +43,7 @@ hip_ptr allocate_gpu(std::size_t sz, bool host = false)
template
<
class
T
>
template
<
class
T
>
std
::
vector
<
T
>
read_from_gpu
(
const
void
*
x
,
std
::
size_t
sz
)
std
::
vector
<
T
>
read_from_gpu
(
const
void
*
x
,
std
::
size_t
sz
)
{
{
gpu_sync
();
std
::
vector
<
T
>
result
(
sz
);
std
::
vector
<
T
>
result
(
sz
);
auto
status
=
hipMemcpy
(
result
.
data
(),
x
,
sz
*
sizeof
(
T
),
hipMemcpyDeviceToHost
);
auto
status
=
hipMemcpy
(
result
.
data
(),
x
,
sz
*
sizeof
(
T
),
hipMemcpyDeviceToHost
);
if
(
status
!=
hipSuccess
)
if
(
status
!=
hipSuccess
)
...
@@ -52,6 +53,7 @@ std::vector<T> read_from_gpu(const void* x, std::size_t sz)
...
@@ -52,6 +53,7 @@ std::vector<T> read_from_gpu(const void* x, std::size_t sz)
hip_ptr
write_to_gpu
(
const
void
*
x
,
std
::
size_t
sz
,
bool
host
=
false
)
hip_ptr
write_to_gpu
(
const
void
*
x
,
std
::
size_t
sz
,
bool
host
=
false
)
{
{
gpu_sync
();
auto
result
=
allocate_gpu
(
sz
,
host
);
auto
result
=
allocate_gpu
(
sz
,
host
);
auto
status
=
hipMemcpy
(
result
.
get
(),
x
,
sz
,
hipMemcpyHostToDevice
);
auto
status
=
hipMemcpy
(
result
.
get
(),
x
,
sz
,
hipMemcpyHostToDevice
);
if
(
status
!=
hipSuccess
)
if
(
status
!=
hipSuccess
)
...
...
src/targets/gpu/include/migraphx/gpu/abs.hpp
View file @
eb0d8fee
#ifndef MIGRAPHX_GUARD_RTGLIB_ABS_HPP
#ifndef MIGRAPHX_GUARD_RTGLIB_ABS_HPP
#define MIGRAPHX_GUARD_RTGLIB_ABS_HPP
#define MIGRAPHX_GUARD_RTGLIB_ABS_HPP
#include <migraphx/gpu/lowering.hpp>
#include <migraphx/shape.hpp>
#include <migraphx/manage_ptr.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/operators.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/shape_for_each.hpp>
#include <migraphx/config.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <migraphx/gpu/hip.hpp>
#include <migraphx/dfor.hpp>
#include <migraphx/gpu/device/contiguous.hpp>
#include <migraphx/gpu/device/add.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/gpu/rocblas.hpp>
#include <migraphx/gpu/context.hpp>
#include <utility>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
gpu
{
struct
context
;
struct
miopen_abs
struct
miopen_abs
{
{
shared
<
activation_descriptor
>
ad
;
shared
<
activation_descriptor
>
ad
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
gpu
::
reflect
(
self
.
ad
.
get
(),
f
);
}
std
::
string
name
()
const
{
return
"gpu::abs"
;
}
std
::
string
name
()
const
{
return
"gpu::abs"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
;
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
;
argument
argument
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
;
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
;
int
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
};
};
}
// namespace gpu
}
// namespace gpu
...
...
src/targets/gpu/include/migraphx/gpu/acos.hpp
View file @
eb0d8fee
#ifndef MIGRAPHX_GUARD_RTGLIB_ACOS_HPP
#ifndef MIGRAPHX_GUARD_RTGLIB_ACOS_HPP
#define MIGRAPHX_GUARD_RTGLIB_ACOS_HPP
#define MIGRAPHX_GUARD_RTGLIB_ACOS_HPP
#include <migraphx/gpu/lowering.hpp>
#include <migraphx/gpu/oper.hpp>
#include <migraphx/gpu/oper.hpp>
#include <migraphx/manage_ptr.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/operators.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/shape_for_each.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <migraphx/gpu/hip.hpp>
#include <migraphx/dfor.hpp>
#include <migraphx/gpu/device/contiguous.hpp>
#include <migraphx/gpu/device/acos.hpp>
#include <migraphx/gpu/device/acos.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/gpu/rocblas.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/config.hpp>
#include <utility>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
...
src/targets/gpu/include/migraphx/gpu/add.hpp
View file @
eb0d8fee
#ifndef MIGRAPHX_GUARD_RTGLIB_ADD_HPP
#ifndef MIGRAPHX_GUARD_RTGLIB_ADD_HPP
#define MIGRAPHX_GUARD_RTGLIB_ADD_HPP
#define MIGRAPHX_GUARD_RTGLIB_ADD_HPP
#include <migraphx/gpu/lowering.hpp>
#include <migraphx/gpu/oper.hpp>
#include <migraphx/gpu/oper.hpp>
#include <migraphx/manage_ptr.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/operators.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/shape_for_each.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <migraphx/gpu/hip.hpp>
#include <migraphx/dfor.hpp>
#include <migraphx/gpu/device/contiguous.hpp>
#include <migraphx/gpu/device/add.hpp>
#include <migraphx/gpu/device/add.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/gpu/rocblas.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/config.hpp>
#include <utility>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
...
src/targets/gpu/include/migraphx/gpu/adjust_allocation.hpp
0 → 100644
View file @
eb0d8fee
#ifndef MIGRAPHX_GUARD_RTGLIB_ADJUST_ALLOCATION_HPP
#define MIGRAPHX_GUARD_RTGLIB_ADJUST_ALLOCATION_HPP
#include <migraphx/program.hpp>
#include <migraphx/config.hpp>
#include <migraphx/gpu/context.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
struct
adjust_allocation
{
std
::
string
name
()
const
{
return
"gpu::adjust_allocation"
;
}
void
apply
(
program
&
p
)
const
;
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/include/migraphx/gpu/asin.hpp
View file @
eb0d8fee
#ifndef MIGRAPHX_GUARD_RTGLIB_ASIN_HPP
#ifndef MIGRAPHX_GUARD_RTGLIB_ASIN_HPP
#define MIGRAPHX_GUARD_RTGLIB_ASIN_HPP
#define MIGRAPHX_GUARD_RTGLIB_ASIN_HPP
#include <migraphx/gpu/lowering.hpp>
#include <migraphx/gpu/oper.hpp>
#include <migraphx/gpu/oper.hpp>
#include <migraphx/manage_ptr.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/operators.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/shape_for_each.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <migraphx/gpu/hip.hpp>
#include <migraphx/dfor.hpp>
#include <migraphx/gpu/device/contiguous.hpp>
#include <migraphx/gpu/device/asin.hpp>
#include <migraphx/gpu/device/asin.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/gpu/rocblas.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/config.hpp>
#include <utility>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
...
src/targets/gpu/include/migraphx/gpu/atan.hpp
View file @
eb0d8fee
#ifndef MIGRAPHX_GUARD_RTGLIB_ATAN_HPP
#ifndef MIGRAPHX_GUARD_RTGLIB_ATAN_HPP
#define MIGRAPHX_GUARD_RTGLIB_ATAN_HPP
#define MIGRAPHX_GUARD_RTGLIB_ATAN_HPP
#include <migraphx/gpu/lowering.hpp>
#include <migraphx/gpu/oper.hpp>
#include <migraphx/gpu/oper.hpp>
#include <migraphx/manage_ptr.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/operators.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/shape_for_each.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <migraphx/gpu/hip.hpp>
#include <migraphx/dfor.hpp>
#include <migraphx/gpu/device/contiguous.hpp>
#include <migraphx/gpu/device/atan.hpp>
#include <migraphx/gpu/device/atan.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/gpu/rocblas.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/config.hpp>
#include <utility>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
...
src/targets/gpu/include/migraphx/gpu/batchnorm.hpp
View file @
eb0d8fee
#ifndef MIGRAPHX_GUARD_RTGLIB_BATCHNORM_HPP
#ifndef MIGRAPHX_GUARD_RTGLIB_BATCHNORM_HPP
#define MIGRAPHX_GUARD_RTGLIB_BATCHNORM_HPP
#define MIGRAPHX_GUARD_RTGLIB_BATCHNORM_HPP
#include <migraphx/gpu/lowering.hpp>
#include <migraphx/shape.hpp>
#include <migraphx/manage_ptr.hpp>
#include <migraphx/op/batch_norm.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/operators.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/shape_for_each.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <migraphx/gpu/hip.hpp>
#include <migraphx/dfor.hpp>
#include <migraphx/gpu/device/contiguous.hpp>
#include <migraphx/gpu/device/add.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/gpu/rocblas.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/config.hpp>
#include <utility>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
gpu
{
struct
context
;
struct
miopen_batch_norm_inference
struct
miopen_batch_norm_inference
{
{
op
::
batch_norm_inference
op
;
op
::
batch_norm_inference
op
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
migraphx
::
reflect
(
self
.
op
,
f
);
}
std
::
string
name
()
const
{
return
"gpu::batch_norm_inference"
;
}
std
::
string
name
()
const
{
return
"gpu::batch_norm_inference"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
;
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
;
argument
argument
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
;
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
;
int
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
};
};
}
// namespace gpu
}
// namespace gpu
...
...
src/targets/gpu/include/migraphx/gpu/clip.hpp
0 → 100644
View file @
eb0d8fee
#ifndef MIGRAPHX_GUARD_RTGLIB_CLIP_HPP
#define MIGRAPHX_GUARD_RTGLIB_CLIP_HPP
#include <migraphx/shape.hpp>
#include <migraphx/op/clip.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
struct
context
;
struct
hip_clip
{
op
::
clip
op
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
migraphx
::
reflect
(
self
.
op
,
f
);
}
std
::
string
name
()
const
{
return
"gpu::clip"
;
}
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
;
argument
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
;
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/include/migraphx/gpu/concat.hpp
View file @
eb0d8fee
#ifndef MIGRAPHX_GUARD_RTGLIB_CONCAT_HPP
#ifndef MIGRAPHX_GUARD_RTGLIB_CONCAT_HPP
#define MIGRAPHX_GUARD_RTGLIB_CONCAT_HPP
#define MIGRAPHX_GUARD_RTGLIB_CONCAT_HPP
#include <migraphx/gpu/lowering.hpp>
#include <migraphx/shape.hpp>
#include <migraphx/manage_ptr.hpp>
#include <migraphx/op/concat.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/operators.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/shape_for_each.hpp>
#include <migraphx/config.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <migraphx/gpu/hip.hpp>
#include <migraphx/dfor.hpp>
#include <migraphx/gpu/device/concat.hpp>
#include <migraphx/gpu/device/add.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/gpu/rocblas.hpp>
#include <migraphx/gpu/context.hpp>
#include <utility>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
gpu
{
struct
context
;
struct
hip_concat
struct
hip_concat
{
{
op
::
concat
op
;
op
::
concat
op
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
migraphx
::
reflect
(
self
.
op
,
f
);
}
std
::
string
name
()
const
{
return
"gpu::concat"
;
}
std
::
string
name
()
const
{
return
"gpu::concat"
;
}
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
;
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
;
argument
argument
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
;
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
;
int
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
};
};
}
// namespace gpu
}
// namespace gpu
...
...
src/targets/gpu/include/migraphx/gpu/context.hpp
View file @
eb0d8fee
...
@@ -11,13 +11,19 @@ namespace migraphx {
...
@@ -11,13 +11,19 @@ namespace migraphx {
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
gpu
{
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_DISABLE_NULL_STREAM
)
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_ENABLE_NULL_STREAM
)
using
hip_event_ptr
=
MIGRAPHX_MANAGE_PTR
(
hipEvent_t
,
hipEventDestroy
);
struct
hip_device
struct
hip_device
{
{
hip_device
()
{
add_stream
();
}
hip_device
()
{
add_stream
();
}
hip_device
(
std
::
size_t
id
)
:
device_id
(
id
)
{
add_stream
();
}
hip_device
(
std
::
size_t
id
,
std
::
size_t
n
)
:
device_id
(
id
)
{
for
(
std
::
size_t
i
=
0
;
i
<
n
;
i
++
)
add_stream
();
}
struct
stream
struct
stream
{
{
...
@@ -32,7 +38,7 @@ struct hip_device
...
@@ -32,7 +38,7 @@ struct hip_device
static
hip_stream_ptr
create_stream
()
static
hip_stream_ptr
create_stream
()
{
{
hipStream_t
result
=
nullptr
;
hipStream_t
result
=
nullptr
;
auto
status
=
hipStreamCreate
(
&
result
);
auto
status
=
hipStreamCreate
WithFlags
(
&
result
,
hipStreamNonBlocking
);
if
(
status
!=
hipSuccess
)
if
(
status
!=
hipSuccess
)
MIGRAPHX_THROW
(
"Failed to allocate stream"
);
MIGRAPHX_THROW
(
"Failed to allocate stream"
);
return
hip_stream_ptr
{
result
};
return
hip_stream_ptr
{
result
};
...
@@ -40,7 +46,7 @@ struct hip_device
...
@@ -40,7 +46,7 @@ struct hip_device
hipStream_t
get
()
hipStream_t
get
()
{
{
if
(
enabled
(
MIGRAPHX_
DIS
ABLE_NULL_STREAM
{}))
if
(
not
enabled
(
MIGRAPHX_
EN
ABLE_NULL_STREAM
{}))
{
{
setup
();
setup
();
if
(
s
==
nullptr
)
if
(
s
==
nullptr
)
...
@@ -53,7 +59,7 @@ struct hip_device
...
@@ -53,7 +59,7 @@ struct hip_device
auto
create_miopen_handle
()
auto
create_miopen_handle
()
{
{
if
(
enabled
(
MIGRAPHX_
DIS
ABLE_NULL_STREAM
{}))
if
(
not
enabled
(
MIGRAPHX_
EN
ABLE_NULL_STREAM
{}))
return
make_obj
<
miopen_handle
>
(
&
miopenCreateWithStream
,
get
());
return
make_obj
<
miopen_handle
>
(
&
miopenCreateWithStream
,
get
());
else
else
return
make_obj
<
miopen_handle
>
(
&
miopenCreate
);
return
make_obj
<
miopen_handle
>
(
&
miopenCreate
);
...
@@ -77,6 +83,22 @@ struct hip_device
...
@@ -77,6 +83,22 @@ struct hip_device
return
rbhandle
.
get
();
return
rbhandle
.
get
();
}
}
void
wait
(
hipEvent_t
event
)
{
setup
();
auto
status
=
hipStreamWaitEvent
(
get
(),
event
,
0
);
if
(
status
!=
hipSuccess
)
MIGRAPHX_THROW
(
"Failed to wait."
);
}
void
record
(
hipEvent_t
event
)
{
setup
();
auto
status
=
hipEventRecord
(
event
,
get
());
if
(
status
!=
hipSuccess
)
MIGRAPHX_THROW
(
"Failed to record."
);
}
private:
private:
std
::
size_t
id
=
0
;
std
::
size_t
id
=
0
;
shared
<
hip_stream_ptr
>
s
=
nullptr
;
shared
<
hip_stream_ptr
>
s
=
nullptr
;
...
@@ -88,8 +110,14 @@ struct hip_device
...
@@ -88,8 +110,14 @@ struct hip_device
stream
&
get_stream
()
{
return
streams
.
at
(
current_stream
);
}
stream
&
get_stream
()
{
return
streams
.
at
(
current_stream
);
}
stream
&
get_stream
(
std
::
size_t
n
)
{
return
streams
.
at
(
n
);
}
void
set_stream
(
std
::
size_t
n
)
{
current_stream
=
n
;
}
void
set_stream
(
std
::
size_t
n
)
{
current_stream
=
n
;
}
std
::
size_t
nstreams
()
const
{
return
streams
.
size
();
}
std
::
size_t
stream_id
()
const
{
return
current_stream
;
}
private:
private:
std
::
size_t
device_id
=
0
;
std
::
size_t
device_id
=
0
;
std
::
size_t
current_stream
=
0
;
std
::
size_t
current_stream
=
0
;
...
@@ -98,7 +126,10 @@ struct hip_device
...
@@ -98,7 +126,10 @@ struct hip_device
struct
context
struct
context
{
{
context
(
std
::
size_t
n
=
0
)
:
current_device
(
std
::
make_shared
<
hip_device
>
(
n
))
{}
context
(
std
::
size_t
device_id
=
0
,
std
::
size_t
n
=
4
)
:
current_device
(
std
::
make_shared
<
hip_device
>
(
device_id
,
n
))
{
}
hip_device
&
get_current_device
()
hip_device
&
get_current_device
()
{
{
...
@@ -107,13 +138,34 @@ struct context
...
@@ -107,13 +138,34 @@ struct context
}
}
hip_device
::
stream
&
get_stream
()
{
return
get_current_device
().
get_stream
();
}
hip_device
::
stream
&
get_stream
()
{
return
get_current_device
().
get_stream
();
}
hip_device
::
stream
&
get_stream
(
std
::
size_t
n
)
{
return
get_current_device
().
get_stream
(
n
);
}
void
set_stream
(
std
::
size_t
n
)
{
get_current_device
().
set_stream
(
n
);
}
void
create_events
(
std
::
size_t
num_of_events
)
{
for
(
std
::
size_t
i
=
events
.
size
();
i
<
num_of_events
+
1
;
++
i
)
events
.
emplace_back
(
create_event
());
}
hipEvent_t
get_event
(
std
::
size_t
i
)
const
{
return
events
.
at
(
i
).
get
();
}
std
::
vector
<
argument
>
literals
{};
std
::
vector
<
argument
>
literals
{};
void
finish
()
const
{
gpu_sync
();
}
void
finish
()
const
{
gpu_sync
();
}
static
hip_event_ptr
create_event
()
{
hipEvent_t
event
;
auto
status
=
hipEventCreateWithFlags
(
&
event
,
hipEventDisableTiming
);
if
(
status
!=
hipSuccess
)
MIGRAPHX_THROW
(
"Failed to create event"
);
return
hip_event_ptr
{
event
};
}
private:
private:
// TODO: Make this a vector to support multiple devices
// TODO: Make this a vector to support multiple devices
std
::
shared_ptr
<
hip_device
>
current_device
;
std
::
shared_ptr
<
hip_device
>
current_device
;
std
::
vector
<
shared
<
hip_event_ptr
>>
events
;
};
};
}
// namespace gpu
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace MIGRAPHX_INLINE_NS
...
...
src/targets/gpu/include/migraphx/gpu/contiguous.hpp
View file @
eb0d8fee
#ifndef MIGRAPHX_GUARD_RTGLIB_CONTIGUOUS_HPP
#ifndef MIGRAPHX_GUARD_RTGLIB_CONTIGUOUS_HPP
#define MIGRAPHX_GUARD_RTGLIB_CONTIGUOUS_HPP
#define MIGRAPHX_GUARD_RTGLIB_CONTIGUOUS_HPP
#include <migraphx/gpu/lowering.hpp>
#include <migraphx/shape.hpp>
#include <migraphx/manage_ptr.hpp>
#include <migraphx/op/contiguous.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/operators.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/shape_for_each.hpp>
#include <migraphx/config.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <migraphx/gpu/hip.hpp>
#include <migraphx/dfor.hpp>
#include <migraphx/gpu/device/contiguous.hpp>
#include <migraphx/gpu/device/add.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/gpu/rocblas.hpp>
#include <migraphx/gpu/context.hpp>
#include <utility>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
gpu
{
struct
context
;
struct
miopen_contiguous
struct
miopen_contiguous
{
{
op
::
contiguous
op
;
op
::
contiguous
op
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
migraphx
::
reflect
(
self
.
op
,
f
);
}
std
::
string
name
()
const
{
return
"gpu::contiguous"
;
}
std
::
string
name
()
const
{
return
"gpu::contiguous"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
;
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
;
argument
compute
(
context
&
,
shape
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
;
argument
compute
(
context
&
,
shape
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
;
int
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
};
};
}
// namespace gpu
}
// namespace gpu
...
...
Prev
1
…
4
5
6
7
8
9
10
11
12
…
16
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment