Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
33a41ba0
Commit
33a41ba0
authored
Jun 14, 2019
by
Paul
Browse files
Merge branch 'develop' into batch-concat
parents
b092d017
a7bd5ded
Changes
210
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
595 additions
and
106 deletions
+595
-106
src/targets/gpu/CMakeLists.txt
src/targets/gpu/CMakeLists.txt
+5
-0
src/targets/gpu/abs.cpp
src/targets/gpu/abs.cpp
+2
-2
src/targets/gpu/adjust_allocation.cpp
src/targets/gpu/adjust_allocation.cpp
+37
-0
src/targets/gpu/clip.cpp
src/targets/gpu/clip.cpp
+23
-0
src/targets/gpu/device/clip.cpp
src/targets/gpu/device/clip.cpp
+22
-0
src/targets/gpu/device/convert.cpp
src/targets/gpu/device/convert.cpp
+24
-0
src/targets/gpu/device/gather.cpp
src/targets/gpu/device/gather.cpp
+1
-1
src/targets/gpu/device/logsoftmax.cpp
src/targets/gpu/device/logsoftmax.cpp
+42
-33
src/targets/gpu/device/softmax.cpp
src/targets/gpu/device/softmax.cpp
+83
-0
src/targets/gpu/eliminate_workspace.cpp
src/targets/gpu/eliminate_workspace.cpp
+0
-1
src/targets/gpu/fuse_ops.cpp
src/targets/gpu/fuse_ops.cpp
+34
-5
src/targets/gpu/gemm.cpp
src/targets/gpu/gemm.cpp
+172
-55
src/targets/gpu/include/migraphx/gpu/abs.hpp
src/targets/gpu/include/migraphx/gpu/abs.hpp
+11
-1
src/targets/gpu/include/migraphx/gpu/adjust_allocation.hpp
src/targets/gpu/include/migraphx/gpu/adjust_allocation.hpp
+23
-0
src/targets/gpu/include/migraphx/gpu/batchnorm.hpp
src/targets/gpu/include/migraphx/gpu/batchnorm.hpp
+12
-2
src/targets/gpu/include/migraphx/gpu/clip.hpp
src/targets/gpu/include/migraphx/gpu/clip.hpp
+37
-0
src/targets/gpu/include/migraphx/gpu/concat.hpp
src/targets/gpu/include/migraphx/gpu/concat.hpp
+11
-2
src/targets/gpu/include/migraphx/gpu/contiguous.hpp
src/targets/gpu/include/migraphx/gpu/contiguous.hpp
+12
-2
src/targets/gpu/include/migraphx/gpu/convert.hpp
src/targets/gpu/include/migraphx/gpu/convert.hpp
+39
-0
src/targets/gpu/include/migraphx/gpu/convolution.hpp
src/targets/gpu/include/migraphx/gpu/convolution.hpp
+5
-2
No files found.
src/targets/gpu/CMakeLists.txt
View file @
33a41ba0
...
...
@@ -27,11 +27,14 @@ add_library(migraphx_device
device/add_relu.cpp
device/contiguous.cpp
device/logsoftmax.cpp
device/softmax.cpp
device/convert.cpp
device/mul.cpp
device/concat.cpp
device/pad.cpp
device/gather.cpp
device/sub.cpp
device/clip.cpp
)
set_target_properties
(
migraphx_device PROPERTIES EXPORT_NAME device
)
rocm_clang_tidy_check
(
migraphx_device
)
...
...
@@ -65,6 +68,8 @@ add_library(migraphx_gpu
gather.cpp
lrn.cpp
schedule_model.cpp
adjust_allocation.cpp
clip.cpp
)
set_target_properties
(
migraphx_gpu PROPERTIES EXPORT_NAME gpu
)
rocm_clang_tidy_check
(
migraphx_gpu
)
...
...
src/targets/gpu/abs.cpp
View file @
33a41ba0
...
...
@@ -7,8 +7,8 @@ namespace gpu {
shape
miopen_abs
::
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
check_shapes
{
inputs
,
*
this
}.
has
(
2
).
not_broadcast
ed
();
return
inputs
.
at
(
1
);
check_shapes
{
inputs
,
*
this
}.
has
(
2
).
pack
ed
();
return
inputs
.
at
(
0
);
}
argument
miopen_abs
::
compute
(
context
&
ctx
,
...
...
src/targets/gpu/adjust_allocation.cpp
0 → 100644
View file @
33a41ba0
#include <migraphx/gpu/adjust_allocation.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/program.hpp>
#include <migraphx/iterator_for.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
void
adjust_allocation
::
apply
(
program
&
p
)
const
{
for
(
auto
ins
:
iterator_for
(
p
))
{
// skip instruction with no input
if
(
ins
->
inputs
().
empty
())
continue
;
if
(
ins
->
name
()
==
"load"
)
continue
;
auto
alias_ins
=
instruction
::
get_output_alias
(
ins
,
true
);
if
(
alias_ins
->
name
()
==
"hip::allocate"
)
{
// shape allocated is different from actual shape
// of the instruction, reallocate and replace the previous one
if
(
alias_ins
->
get_shape
()
!=
ins
->
get_shape
())
{
auto
alloc_ins
=
p
.
insert_instruction
(
ins
,
hip_allocate
{
ins
->
get_shape
()});
p
.
replace_instruction
(
alias_ins
,
alloc_ins
);
}
}
}
}
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/clip.cpp
0 → 100644
View file @
33a41ba0
#include <migraphx/gpu/clip.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/device/clip.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
shape
hip_clip
::
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
{
inputs
.
pop_back
();
return
op
.
compute_shape
(
inputs
);
}
argument
hip_clip
::
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
{
device
::
clip
(
ctx
.
get_stream
().
get
(),
args
.
back
(),
args
.
front
(),
op
.
max_val
,
op
.
min_val
);
return
args
.
back
();
}
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/device/clip.cpp
0 → 100644
View file @
33a41ba0
#include <migraphx/gpu/device/clip.hpp>
#include <migraphx/gpu/device/nary.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
void
clip
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg1
,
const
float
max
,
const
float
min
)
{
nary
(
stream
,
result
,
arg1
)(
[
max
,
min
](
auto
x
)
{
return
std
::
min
<
decltype
(
x
)
>
(
std
::
max
<
decltype
(
x
)
>
(
min
,
x
),
max
);
});
}
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/device/convert.cpp
0 → 100644
View file @
33a41ba0
#include <migraphx/gpu/device/convert.hpp>
#include <migraphx/gpu/device/nary.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
void
convert
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
{
result
.
visit
([
&
](
auto
output
)
{
arg
.
visit
([
&
](
auto
input
)
{
const
auto
*
input_ptr
=
device_cast
(
input
.
data
());
auto
*
output_ptr
=
device_cast
(
output
.
data
());
gs_launch
(
stream
,
result
.
get_shape
().
elements
())([
=
](
auto
i
)
{
output_ptr
[
i
]
=
input_ptr
[
i
];
});
});
});
}
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/device/gather.cpp
View file @
33a41ba0
...
...
@@ -16,7 +16,7 @@ argument gather(hipStream_t stream,
std
::
vector
<
migraphx
::
argument
>
args
,
int
axis
)
{
int
axis_index
=
(
axis
<
0
)
?
(
axis
+
args
[
0
].
get_shape
().
lens
().
size
())
:
axis
;
auto
axis_index
=
(
axis
<
0
)
?
(
axis
+
args
[
0
].
get_shape
().
lens
().
size
())
:
axis
;
visit_all
(
args
.
back
(),
args
[
0
])([
&
](
auto
output
,
auto
input
)
{
std
::
size_t
nelements
=
output_shape
.
elements
();
args
[
1
].
visit
([
&
](
auto
indices
)
{
...
...
src/targets/gpu/device/logsoftmax.cpp
View file @
33a41ba0
...
...
@@ -17,47 +17,56 @@ argument logsoftmax(hipStream_t stream,
int
axis
)
{
auto
lens
=
output_shape
.
lens
();
std
::
size_t
batch_size
=
std
::
accumulate
(
lens
.
begin
(),
lens
.
begin
()
+
axis
,
std
::
size_t
{
1
},
std
::
multiplies
<
std
::
size_t
>
());
std
::
size_t
n_dims
=
std
::
accumulate
(
lens
.
begin
()
+
axis
,
lens
.
end
(),
std
::
size_t
{
1
},
std
::
multiplies
<
std
::
size_t
>
());
migraphx
::
shape
comp_shape
{
output_shape
.
type
(),
{
batch_size
,
n_dims
}};
auto
lens
=
output_shape
.
lens
();
auto
num_in_batch
=
lens
[
axis
];
auto
batch_lens
=
lens
;
batch_lens
[
axis
]
=
1
;
migraphx
::
shape
batch_shape
{
output_shape
.
type
(),
batch_lens
};
visit_all
(
args
.
back
(),
args
.
front
())([
&
](
auto
output
,
auto
input
)
{
const
auto
*
input_ptr
=
device_cast
(
input
.
data
());
auto
*
output_ptr
=
device_cast
(
output
.
data
());
visit_tensor_size
(
batch_shape
.
lens
().
size
(),
[
&
](
auto
n_dim
)
{
hip_tensor_descriptor
<
n_dim
>
desc_batch
(
batch_shape
);
hip_tensor_descriptor
<
n_dim
>
desc_data
(
output_shape
);
// each thread is for one item in the batch
gs_launch
(
stream
,
batch_size
)([
=
](
auto
i
)
{
std
::
size_t
row_start
=
i
*
n_dims
;
// get max
auto
batch_max
=
input_ptr
[
row_start
];
for
(
std
::
size_t
j
=
1
;
j
<
n_dims
;
++
j
)
{
auto
ind
=
row_start
+
j
;
batch_max
=
std
::
max
(
to_hip_type
(
batch_max
),
to_hip_type
(
input_ptr
[
ind
]));
}
// each thread is for one item in the batch
gs_launch
(
stream
,
batch_shape
.
elements
())([
=
](
auto
i
)
{
auto
batch_idx
=
desc_batch
.
multi
(
i
);
auto
data_idx
=
batch_idx
;
for
(
std
::
size_t
j
=
0
;
j
<
n_dims
;
++
j
)
{
auto
ind
=
row_start
+
j
;
output_ptr
[
ind
]
=
input_ptr
[
ind
]
-
batch_max
;
}
// get max
auto
batch_max
=
input_ptr
[
desc_data
.
linear
(
batch_idx
)];
for
(
std
::
size_t
j
=
1
;
j
<
num_in_batch
;
++
j
)
{
data_idx
[
axis
]
=
j
;
size_t
idx
=
desc_data
.
linear
(
data_idx
);
batch_max
=
std
::
max
(
to_hip_type
(
batch_max
),
to_hip_type
(
input_ptr
[
idx
]));
}
auto
batch_sum
=
::
exp
(
to_hip_type
(
output_ptr
[
row_start
]));
for
(
std
::
size_t
j
=
1
;
j
<
n_dims
;
++
j
)
{
auto
ind
=
row_start
+
j
;
batch_sum
+=
::
exp
(
to_hip_type
(
output_ptr
[
ind
]));
}
batch_sum
=
::
log
(
to_hip_type
(
batch_sum
));
for
(
std
::
size_t
j
=
0
;
j
<
num_in_batch
;
++
j
)
{
data_idx
[
axis
]
=
j
;
size_t
idx
=
desc_data
.
linear
(
data_idx
);
output_ptr
[
idx
]
=
input_ptr
[
idx
]
-
batch_max
;
}
for
(
std
::
size_t
j
=
0
;
j
<
n_dims
;
++
j
)
{
auto
ind
=
row_start
+
j
;
output_ptr
[
ind
]
-=
batch_sum
;
}
auto
batch_sum
=
::
exp
(
to_hip_type
(
output_ptr
[
desc_data
.
linear
(
batch_idx
)]));
for
(
std
::
size_t
j
=
1
;
j
<
num_in_batch
;
++
j
)
{
data_idx
[
axis
]
=
j
;
size_t
idx
=
desc_data
.
linear
(
data_idx
);
batch_sum
+=
::
exp
(
to_hip_type
(
output_ptr
[
idx
]));
}
batch_sum
=
::
log
(
to_hip_type
(
batch_sum
));
for
(
std
::
size_t
j
=
0
;
j
<
num_in_batch
;
++
j
)
{
data_idx
[
axis
]
=
j
;
size_t
idx
=
desc_data
.
linear
(
data_idx
);
output_ptr
[
idx
]
-=
batch_sum
;
}
});
});
});
...
...
src/targets/gpu/device/softmax.cpp
0 → 100644
View file @
33a41ba0
#include <migraphx/shape.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/dfor.hpp>
#include <migraphx/gpu/device/softmax.hpp>
#include <migraphx/gpu/device/tensor.hpp>
#include <migraphx/gpu/device/launch.hpp>
#include <migraphx/gpu/device/types.hpp>
#include <migraphx/gpu/hip.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
argument
softmax
(
hipStream_t
stream
,
const
migraphx
::
shape
&
output_shape
,
std
::
vector
<
migraphx
::
argument
>
args
,
int
axis
)
{
auto
lens
=
output_shape
.
lens
();
auto
batch_lens
=
lens
;
size_t
n_dims
=
lens
[
axis
];
batch_lens
[
axis
]
=
1
;
migraphx
::
shape
batch_shape
{
shape
::
int32_type
,
batch_lens
};
visit_all
(
args
.
back
(),
args
.
front
())([
&
](
auto
output
,
auto
input
)
{
const
auto
*
input_ptr
=
device_cast
(
input
.
data
());
auto
*
output_ptr
=
device_cast
(
output
.
data
());
visit_tensor_size
(
batch_shape
.
lens
().
size
(),
[
&
](
auto
n_dim
)
{
hip_tensor_descriptor
<
n_dim
>
desc_batch
(
batch_shape
);
hip_tensor_descriptor
<
n_dim
>
desc_data
(
output_shape
);
// each thread is for one item in the batch
gs_launch
(
stream
,
batch_shape
.
elements
())([
=
](
auto
i
)
{
auto
batch_idx
=
desc_batch
.
multi
(
i
);
auto
data_idx
=
batch_idx
;
// get max
auto
batch_max
=
input_ptr
[
desc_data
.
linear
(
batch_idx
)];
for
(
std
::
size_t
j
=
1
;
j
<
n_dims
;
++
j
)
{
data_idx
[
axis
]
=
j
;
batch_max
=
std
::
max
(
to_hip_type
(
batch_max
),
to_hip_type
(
input_ptr
[
desc_data
.
linear
(
data_idx
)]));
}
for
(
std
::
size_t
j
=
0
;
j
<
n_dims
;
++
j
)
{
data_idx
[
axis
]
=
j
;
auto
idx
=
desc_data
.
linear
(
data_idx
);
output_ptr
[
idx
]
=
input_ptr
[
idx
]
-
batch_max
;
}
for
(
std
::
size_t
j
=
0
;
j
<
n_dims
;
++
j
)
{
data_idx
[
axis
]
=
j
;
auto
idx
=
desc_data
.
linear
(
data_idx
);
output_ptr
[
idx
]
=
exp
(
to_hip_type
(
output_ptr
[
idx
]));
}
auto
batch_sum
=
output_ptr
[
desc_data
.
linear
(
batch_idx
)];
for
(
std
::
size_t
j
=
1
;
j
<
n_dims
;
++
j
)
{
data_idx
[
axis
]
=
j
;
batch_sum
+=
output_ptr
[
desc_data
.
linear
(
data_idx
)];
}
for
(
std
::
size_t
j
=
0
;
j
<
n_dims
;
++
j
)
{
data_idx
[
axis
]
=
j
;
auto
idx
=
desc_data
.
linear
(
data_idx
);
output_ptr
[
idx
]
=
output_ptr
[
idx
]
/
batch_sum
;
}
});
});
});
return
args
.
back
();
}
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/eliminate_workspace.cpp
View file @
33a41ba0
...
...
@@ -2,7 +2,6 @@
#include <migraphx/gpu/hip.hpp>
#include <migraphx/program.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/operators.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/stringutils.hpp>
...
...
src/targets/gpu/fuse_ops.cpp
View file @
33a41ba0
...
...
@@ -140,6 +140,8 @@ MIGRAPHX_PRED_MATCHER(fusable_conv, instruction_ref ins)
auto
conv
=
any_cast
<
miopen_convolution
>
(
ins
->
get_operator
());
if
(
conv
.
op
.
group
>
1
)
return
false
;
if
(
conv
.
op
.
padding_mode
!=
op
::
padding_mode_t
::
default_
)
return
false
;
if
(
wei
.
lens
()[
1
]
>
512
and
conv
.
algo
!=
miopenConvolutionFwdAlgoWinograd
)
return
false
;
auto
op
=
conv
.
op
;
...
...
@@ -160,7 +162,10 @@ struct hip_triadd
device
::
add
(
ctx
.
get_stream
().
get
(),
args
.
at
(
3
),
args
.
at
(
0
),
args
.
at
(
1
),
args
.
at
(
2
));
return
args
.
at
(
3
);
}
int
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
};
struct
hip_triadd_relu
...
...
@@ -176,7 +181,10 @@ struct hip_triadd_relu
device
::
add_relu
(
ctx
.
get_stream
().
get
(),
args
.
at
(
3
),
args
.
at
(
0
),
args
.
at
(
1
),
args
.
at
(
2
));
return
args
.
at
(
3
);
}
int
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
};
struct
hip_add_relu
...
...
@@ -192,7 +200,10 @@ struct hip_add_relu
device
::
add_relu
(
ctx
.
get_stream
().
get
(),
args
.
at
(
2
),
args
.
at
(
0
),
args
.
at
(
1
));
return
args
.
at
(
2
);
}
int
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
};
struct
find_add_relu
...
...
@@ -251,6 +262,12 @@ struct miopen_conv_bias
fusion
::
op_t
conv
;
fusion
::
op_t
bias
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
op
::
convolution
::
reflect
(
self
.
op
,
f
);
}
miopen_conv_bias
(
op
::
convolution
c
,
const
shape
&
input
,
const
shape
&
weights
,
const
shape
&
b
)
:
op
(
c
),
f
(
input
)
{
...
...
@@ -277,7 +294,10 @@ struct miopen_conv_bias
void
finalize
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
shape
>&
)
{
f
.
compile
(
ctx
);
}
shape
get_workspace
(
context
&
ctx
)
{
return
f
.
get_workspace
(
ctx
);
}
int
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
};
struct
miopen_conv_bias_relu
...
...
@@ -288,6 +308,12 @@ struct miopen_conv_bias_relu
fusion
::
op_t
bias
;
fusion
::
op_t
relu
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
op
::
convolution
::
reflect
(
self
.
op
,
f
);
}
miopen_conv_bias_relu
(
op
::
convolution
c
,
const
shape
&
input
,
const
shape
&
weights
,
...
...
@@ -318,7 +344,10 @@ struct miopen_conv_bias_relu
}
void
finalize
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
shape
>&
)
{
f
.
compile
(
ctx
);
}
shape
get_workspace
(
context
&
ctx
)
{
return
f
.
get_workspace
(
ctx
);
}
int
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
};
template
<
class
...
Ms
>
...
...
src/targets/gpu/gemm.cpp
View file @
33a41ba0
#include <migraphx/gpu/gemm.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/device/add.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
template
<
class
...
Ts
>
void
generic_rocblas_
batched_gemm
(
shape
::
as
<
float
>
,
Ts
&&
...
xs
)
rocblas_status
generic_rocblas_
scal
(
shape
::
as
<
float
>
,
Ts
&&
...
xs
)
{
rocblas_s
gemm_strided_batched
(
std
::
forward
<
Ts
>
(
xs
)...);
return
rocblas_s
scal
(
std
::
forward
<
Ts
>
(
xs
)...);
}
template
<
class
...
Ts
>
void
generic_rocblas_
batched_gemm
(
shape
::
as
<
double
>
,
Ts
&&
...
xs
)
rocblas_status
generic_rocblas_
scal
(
shape
::
as
<
double
>
,
Ts
&&
...
xs
)
{
rocblas_dgemm_strided_batched
(
std
::
forward
<
Ts
>
(
xs
)...);
return
rocblas_dscal
(
std
::
forward
<
Ts
>
(
xs
)...);
}
template
<
class
T
,
class
...
Ts
>
rocblas_status
generic_rocblas_scal
(
shape
::
as
<
T
>
,
Ts
&&
...)
{
MIGRAPHX_THROW
(
"GENERIC_ROCBLAS_SCAL: type unsupported by rocblas"
);
}
template
<
class
...
Ts
>
rocblas_status
generic_rocblas_axpy
(
shape
::
as
<
half
>
,
Ts
&&
...
xs
)
{
return
rocblas_haxpy
(
std
::
forward
<
Ts
>
(
xs
)...);
}
template
<
class
...
Ts
>
rocblas_status
generic_rocblas_axpy
(
shape
::
as
<
float
>
,
Ts
&&
...
xs
)
{
return
rocblas_saxpy
(
std
::
forward
<
Ts
>
(
xs
)...);
}
template
<
class
...
Ts
>
rocblas_status
generic_rocblas_axpy
(
shape
::
as
<
double
>
,
Ts
&&
...
xs
)
{
return
rocblas_daxpy
(
std
::
forward
<
Ts
>
(
xs
)...);
}
template
<
class
T
,
class
...
Ts
>
rocblas_status
generic_rocblas_axpy
(
shape
::
as
<
T
>
,
Ts
&&
...)
{
MIGRAPHX_THROW
(
"GENERIC_ROCBLAS_AXPY: type unsupported by rocblas"
);
}
template
<
class
...
Ts
>
rocblas_status
generic_rocblas_dot
(
shape
::
as
<
float
>
,
Ts
&&
...
xs
)
{
return
rocblas_sdot
(
std
::
forward
<
Ts
>
(
xs
)...);
}
template
<
class
...
Ts
>
void
generic_rocblas_
batched_gemm
(
shape
::
as
<
half
>
,
Ts
&&
...
xs
)
rocblas_status
generic_rocblas_
dot
(
shape
::
as
<
double
>
,
Ts
&&
...
xs
)
{
r
ocblas_hgemm_strided_batched
(
std
::
forward
<
Ts
>
(
xs
)...);
r
eturn
rocblas_ddot
(
std
::
forward
<
Ts
>
(
xs
)...);
}
template
<
class
T
,
class
...
Ts
>
void
generic_rocblas_batched_gemm
(
shape
::
as
<
T
>
,
Ts
&&
...)
rocblas_status
generic_rocblas_dot
(
shape
::
as
<
T
>
,
Ts
&&
...)
{
MIGRAPHX_THROW
(
"GENERIC_ROCBLAS_DOT: type unsupported by rocblas"
);
}
template
<
class
...
Ts
>
rocblas_status
generic_rocblas_gemv
(
shape
::
as
<
float
>
,
Ts
&&
...
xs
)
{
return
rocblas_sgemv
(
std
::
forward
<
Ts
>
(
xs
)...);
}
template
<
class
...
Ts
>
rocblas_status
generic_rocblas_gemv
(
shape
::
as
<
double
>
,
Ts
&&
...
xs
)
{
return
rocblas_dgemv
(
std
::
forward
<
Ts
>
(
xs
)...);
}
template
<
class
T
,
class
...
Ts
>
rocblas_status
generic_rocblas_gemv
(
shape
::
as
<
T
>
,
Ts
&&
...)
{
MIGRAPHX_THROW
(
"GENERIC_ROCBLAS_GEMMV: type unsupported by rocblas"
);
}
template
<
class
...
Ts
>
rocblas_status
generic_rocblas_batched_gemm
(
shape
::
as
<
float
>
,
Ts
&&
...
xs
)
{
return
rocblas_sgemm_strided_batched
(
std
::
forward
<
Ts
>
(
xs
)...);
}
template
<
class
...
Ts
>
rocblas_status
generic_rocblas_batched_gemm
(
shape
::
as
<
double
>
,
Ts
&&
...
xs
)
{
return
rocblas_dgemm_strided_batched
(
std
::
forward
<
Ts
>
(
xs
)...);
}
template
<
class
...
Ts
>
rocblas_status
generic_rocblas_batched_gemm
(
shape
::
as
<
half
>
,
Ts
&&
...
xs
)
{
return
rocblas_hgemm_strided_batched
(
std
::
forward
<
Ts
>
(
xs
)...);
}
template
<
class
T
,
class
...
Ts
>
rocblas_status
generic_rocblas_batched_gemm
(
shape
::
as
<
T
>
,
Ts
&&
...)
{
MIGRAPHX_THROW
(
"GENERIC_ROCBLAS_BATCHED_GEMM: type unsupported by rocblas"
);
}
template
<
class
...
Ts
>
void
generic_rocblas_gemm
(
shape
::
as
<
float
>
,
Ts
&&
...
xs
)
rocblas_status
generic_rocblas_gemm
(
shape
::
as
<
float
>
,
Ts
&&
...
xs
)
{
rocblas_sgemm
(
std
::
forward
<
Ts
>
(
xs
)...);
return
rocblas_sgemm
(
std
::
forward
<
Ts
>
(
xs
)...);
}
template
<
class
...
Ts
>
void
generic_rocblas_gemm
(
shape
::
as
<
double
>
,
Ts
&&
...
xs
)
rocblas_status
generic_rocblas_gemm
(
shape
::
as
<
double
>
,
Ts
&&
...
xs
)
{
rocblas_dgemm
(
std
::
forward
<
Ts
>
(
xs
)...);
return
rocblas_dgemm
(
std
::
forward
<
Ts
>
(
xs
)...);
}
template
<
class
...
Ts
>
void
generic_rocblas_gemm
(
shape
::
as
<
half
>
,
Ts
&&
...
xs
)
rocblas_status
generic_rocblas_gemm
(
shape
::
as
<
half
>
,
Ts
&&
...
xs
)
{
rocblas_hgemm
(
std
::
forward
<
Ts
>
(
xs
)...);
return
rocblas_hgemm
(
std
::
forward
<
Ts
>
(
xs
)...);
}
template
<
class
T
,
class
...
Ts
>
void
generic_rocblas_gemm
(
shape
::
as
<
T
>
,
Ts
&&
...)
rocblas_status
generic_rocblas_gemm
(
shape
::
as
<
T
>
,
Ts
&&
...)
{
MIGRAPHX_THROW
(
"GENERIC_ROCBLAS_GEMM: type unsupported by rocblas"
);
}
...
...
@@ -90,56 +169,94 @@ rocblas_half to_rocblas_type(half x) { return reinterpret_cast<const rocblas_hal
shape
miopen_gemm
::
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
check_shapes
{
inputs
,
*
this
}.
has
(
3
);
return
op
.
compute_shape
({
inputs
.
at
(
0
),
inputs
.
at
(
1
)});
std
::
vector
<
shape
>
input_shapes
(
inputs
.
begin
(),
inputs
.
begin
()
+
inputs
.
size
()
-
1
);
check_shapes
{
input_shapes
}.
not_broadcasted
();
return
op
.
compute_shape
(
input_shapes
);
}
argument
miopen_gemm
::
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
{
float
alpha
=
1.0
f
;
float
beta
=
0.0
f
;
bool
transa
=
args
[
0
].
get_shape
().
transposed
();
bool
transb
=
args
[
1
].
get_shape
().
transposed
();
std
::
size_t
n_dims
=
args
[
0
].
get_shape
().
lens
().
size
();
std
::
size_t
dim_0
=
n_dims
-
2
;
std
::
size_t
dim_1
=
n_dims
-
1
;
rocblas_int
lda
=
args
[
0
].
get_shape
().
strides
()[
transa
?
dim_1
:
dim_0
];
rocblas_int
ldb
=
args
[
1
].
get_shape
().
strides
()[
transb
?
dim_1
:
dim_0
];
rocblas_int
ldc
=
args
[
2
].
get_shape
().
strides
()[
dim_0
];
auto
out_lens
=
output_shape
.
lens
();
rocblas_int
m
=
out_lens
[
dim_0
];
rocblas_int
n
=
out_lens
[
dim_1
];
rocblas_int
k
=
args
[
0
].
get_shape
().
lens
()[
dim_1
];
auto
batch_num
=
std
::
accumulate
(
out_lens
.
rbegin
()
+
2
,
out_lens
.
rend
(),
std
::
size_t
{
1
},
std
::
multiplies
<
std
::
size_t
>
());
bool
is_3inputs
=
(
args
.
size
()
==
4
);
float
beta
=
0.0
f
;
if
(
is_3inputs
)
{
output_shape
.
visit_type
([
&
](
auto
as
)
{
auto
to_pointer
=
[
&
](
auto
&&
arg
)
{
return
to_rocblas_type
(
as
.
from
(
arg
.
data
()));
};
hipMemcpyAsync
(
to_pointer
(
args
[
3
]),
to_pointer
(
args
[
2
]),
output_shape
.
bytes
(),
hipMemcpyDeviceToDevice
,
ctx
.
get_stream
().
get
());
});
beta
=
op
.
beta
;
}
auto
a_lens
=
args
[
0
].
get_shape
().
lens
();
auto
b_lens
=
args
[
1
].
get_shape
().
lens
();
output_shape
.
visit_type
([
&
](
auto
as
)
{
auto
alpha_r
=
to_rocblas_type
(
as
(
alpha
));
auto
beta_r
=
to_rocblas_type
(
as
(
beta
));
auto
n_dim
=
output_shape
.
lens
().
size
();
auto
dim_1
=
n_dim
-
1
;
auto
dim_0
=
n_dim
-
2
;
auto
alpha_r
=
to_rocblas_type
(
as
(
op
.
alpha
));
auto
beta_r
=
to_rocblas_type
(
as
(
beta
));
bool
transa
=
args
[
0
].
get_shape
().
transposed
();
bool
transb
=
args
[
1
].
get_shape
().
transposed
();
rocblas_int
lda
=
args
[
0
].
get_shape
().
strides
()[
transa
?
dim_1
:
dim_0
];
rocblas_int
ldb
=
args
[
1
].
get_shape
().
strides
()[
transb
?
dim_1
:
dim_0
];
rocblas_int
ldc
=
args
[
2
].
get_shape
().
strides
()[
dim_0
];
auto
out_lens
=
output_shape
.
lens
();
rocblas_int
m
=
out_lens
[
dim_0
];
rocblas_int
n
=
out_lens
[
dim_1
];
rocblas_int
k
=
args
[
0
].
get_shape
().
lens
()[
dim_1
];
auto
num_matrices
=
std
::
accumulate
(
out_lens
.
rbegin
()
+
2
,
out_lens
.
rend
(),
std
::
size_t
{
1
},
std
::
multiplies
<
std
::
size_t
>
());
auto
to_pointer
=
[
&
](
auto
&&
arg
)
{
return
to_rocblas_type
(
as
.
from
(
arg
.
data
()));
};
generic_rocblas_batched_gemm
(
as
,
ctx
.
get_stream
().
get_rocblas
(),
transb
?
rocblas_operation_transpose
:
rocblas_operation_none
,
transa
?
rocblas_operation_transpose
:
rocblas_operation_none
,
n
,
m
,
k
,
&
alpha_r
,
to_pointer
(
args
[
1
]),
ldb
,
k
*
n
,
to_pointer
(
args
[
0
]),
lda
,
m
*
k
,
&
beta_r
,
to_pointer
(
args
[
2
]),
ldc
,
m
*
n
,
batch_num
);
if
(
num_matrices
==
1
)
{
generic_rocblas_gemm
(
as
,
ctx
.
get_stream
().
get_rocblas
(),
transb
?
rocblas_operation_transpose
:
rocblas_operation_none
,
transa
?
rocblas_operation_transpose
:
rocblas_operation_none
,
n
,
m
,
k
,
&
alpha_r
,
to_pointer
(
args
[
1
]),
ldb
,
to_pointer
(
args
[
0
]),
lda
,
&
beta_r
,
(
is_3inputs
?
to_pointer
(
args
[
3
])
:
to_pointer
(
args
[
2
])),
ldc
);
}
else
{
generic_rocblas_batched_gemm
(
as
,
ctx
.
get_stream
().
get_rocblas
(),
transb
?
rocblas_operation_transpose
:
rocblas_operation_none
,
transa
?
rocblas_operation_transpose
:
rocblas_operation_none
,
n
,
m
,
k
,
&
alpha_r
,
to_pointer
(
args
[
1
]),
ldb
,
k
*
n
,
to_pointer
(
args
[
0
]),
lda
,
m
*
k
,
&
beta_r
,
(
is_3inputs
?
to_pointer
(
args
[
3
])
:
to_pointer
(
args
[
2
])),
ldc
,
m
*
n
,
num_matrices
);
}
});
return
args
[
2
];
return
(
is_3inputs
?
args
[
3
]
:
args
[
2
]
)
;
}
}
// namespace gpu
...
...
src/targets/gpu/include/migraphx/gpu/abs.hpp
View file @
33a41ba0
...
...
@@ -13,11 +13,21 @@ struct context;
struct
miopen_abs
{
shared
<
activation_descriptor
>
ad
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
gpu
::
reflect
(
self
.
ad
.
get
(),
f
);
}
std
::
string
name
()
const
{
return
"gpu::abs"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
;
argument
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
;
int
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
};
}
// namespace gpu
...
...
src/targets/gpu/include/migraphx/gpu/adjust_allocation.hpp
0 → 100644
View file @
33a41ba0
#ifndef MIGRAPHX_GUARD_RTGLIB_ADJUST_ALLOCATION_HPP
#define MIGRAPHX_GUARD_RTGLIB_ADJUST_ALLOCATION_HPP
#include <migraphx/program.hpp>
#include <migraphx/config.hpp>
#include <migraphx/gpu/context.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
struct
adjust_allocation
{
std
::
string
name
()
const
{
return
"gpu::adjust_allocation"
;
}
void
apply
(
program
&
p
)
const
;
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/include/migraphx/gpu/batchnorm.hpp
View file @
33a41ba0
...
...
@@ -2,7 +2,7 @@
#define MIGRAPHX_GUARD_RTGLIB_BATCHNORM_HPP
#include <migraphx/shape.hpp>
#include <migraphx/op
erat
or
s
.hpp>
#include <migraphx/op
/batch_n
or
m
.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
...
@@ -13,11 +13,21 @@ struct context;
struct
miopen_batch_norm_inference
{
op
::
batch_norm_inference
op
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
migraphx
::
reflect
(
self
.
op
,
f
);
}
std
::
string
name
()
const
{
return
"gpu::batch_norm_inference"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
;
argument
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
;
int
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
};
}
// namespace gpu
...
...
src/targets/gpu/include/migraphx/gpu/clip.hpp
0 → 100644
View file @
33a41ba0
#ifndef MIGRAPHX_GUARD_RTGLIB_CLIP_HPP
#define MIGRAPHX_GUARD_RTGLIB_CLIP_HPP
#include <migraphx/shape.hpp>
#include <migraphx/op/clip.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
struct
context
;
struct
hip_clip
{
op
::
clip
op
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
migraphx
::
reflect
(
self
.
op
,
f
);
}
std
::
string
name
()
const
{
return
"gpu::clip"
;
}
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
;
argument
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
;
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/include/migraphx/gpu/concat.hpp
View file @
33a41ba0
...
...
@@ -2,7 +2,7 @@
#define MIGRAPHX_GUARD_RTGLIB_CONCAT_HPP
#include <migraphx/shape.hpp>
#include <migraphx/op
erators
.hpp>
#include <migraphx/op
/concat
.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
...
@@ -14,11 +14,20 @@ struct hip_concat
{
op
::
concat
op
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
migraphx
::
reflect
(
self
.
op
,
f
);
}
std
::
string
name
()
const
{
return
"gpu::concat"
;
}
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
;
argument
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
;
int
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
};
}
// namespace gpu
...
...
src/targets/gpu/include/migraphx/gpu/contiguous.hpp
View file @
33a41ba0
...
...
@@ -2,7 +2,7 @@
#define MIGRAPHX_GUARD_RTGLIB_CONTIGUOUS_HPP
#include <migraphx/shape.hpp>
#include <migraphx/op
erator
s.hpp>
#include <migraphx/op
/contiguou
s.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
...
@@ -13,10 +13,20 @@ struct context;
struct
miopen_contiguous
{
op
::
contiguous
op
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
migraphx
::
reflect
(
self
.
op
,
f
);
}
std
::
string
name
()
const
{
return
"gpu::contiguous"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
;
argument
compute
(
context
&
,
shape
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
;
int
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
};
}
// namespace gpu
...
...
src/targets/gpu/include/migraphx/gpu/convert.hpp
0 → 100644
View file @
33a41ba0
#ifndef MIGRAPHX_GUARD_RTGLIB_CONVERT_HPP
#define MIGRAPHX_GUARD_RTGLIB_CONVERT_HPP
#include <migraphx/shape.hpp>
#include <migraphx/op/convert.hpp>
#include <migraphx/gpu/oper.hpp>
#include <migraphx/gpu/device/convert.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
struct
context
;
struct
hip_convert
:
unary_device
<
hip_convert
,
device
::
convert
>
{
op
::
convert
op
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
migraphx
::
reflect
(
self
.
op
,
f
);
}
hip_convert
(
op
::
convert
oper
)
:
op
(
oper
)
{}
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
{
inputs
.
pop_back
();
check_shapes
{
inputs
}.
packed
();
return
op
.
compute_shape
(
inputs
);
}
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/include/migraphx/gpu/convolution.hpp
View file @
33a41ba0
...
...
@@ -2,7 +2,7 @@
#define MIGRAPHX_GUARD_RTGLIB_CONVOLUTION_HPP
#include <migraphx/shape.hpp>
#include <migraphx/op
erators
.hpp>
#include <migraphx/op
/convolution
.hpp>
#include <migraphx/gpu/miopen.hpp>
namespace
migraphx
{
...
...
@@ -31,7 +31,10 @@ struct miopen_convolution
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
;
shape
compile
(
context
&
ctx
,
const
shape
&
output_shape
,
std
::
vector
<
shape
>
inputs
);
void
finalize
(
context
&
ctx
,
const
shape
&
output_shape
,
std
::
vector
<
shape
>
inputs
);
int
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
};
}
// namespace gpu
...
...
Prev
1
…
3
4
5
6
7
8
9
10
11
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment