Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
94e3a2e4
Commit
94e3a2e4
authored
Feb 12, 2022
by
Shucai Xiao
Browse files
change size_t to int
parent
26bd92d8
Changes
256
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
67 additions
and
67 deletions
+67
-67
src/targets/gpu/device/nonzero.cpp
src/targets/gpu/device/nonzero.cpp
+1
-1
src/targets/gpu/device/pad.cpp
src/targets/gpu/device/pad.cpp
+2
-2
src/targets/gpu/device/reduce_mean.cpp
src/targets/gpu/device/reduce_mean.cpp
+1
-1
src/targets/gpu/device/reverse.cpp
src/targets/gpu/device/reverse.cpp
+2
-2
src/targets/gpu/device/topk.cpp
src/targets/gpu/device/topk.cpp
+1
-1
src/targets/gpu/driver/parser.cpp
src/targets/gpu/driver/parser.cpp
+2
-2
src/targets/gpu/eliminate_workspace.cpp
src/targets/gpu/eliminate_workspace.cpp
+1
-1
src/targets/gpu/fuse_ops.cpp
src/targets/gpu/fuse_ops.cpp
+3
-3
src/targets/gpu/gemm_impl.cpp
src/targets/gpu/gemm_impl.cpp
+1
-1
src/targets/gpu/hip.cpp
src/targets/gpu/hip.cpp
+10
-10
src/targets/gpu/include/migraphx/gpu/code_object_op.hpp
src/targets/gpu/include/migraphx/gpu/code_object_op.hpp
+2
-2
src/targets/gpu/include/migraphx/gpu/compile_hip.hpp
src/targets/gpu/include/migraphx/gpu/compile_hip.hpp
+2
-2
src/targets/gpu/include/migraphx/gpu/compile_hip_code_object.hpp
...gets/gpu/include/migraphx/gpu/compile_hip_code_object.hpp
+2
-2
src/targets/gpu/include/migraphx/gpu/context.hpp
src/targets/gpu/include/migraphx/gpu/context.hpp
+23
-23
src/targets/gpu/include/migraphx/gpu/device/arg_op.hpp
src/targets/gpu/include/migraphx/gpu/device/arg_op.hpp
+3
-3
src/targets/gpu/include/migraphx/gpu/device/concat.hpp
src/targets/gpu/include/migraphx/gpu/device/concat.hpp
+1
-1
src/targets/gpu/include/migraphx/gpu/gemm.hpp
src/targets/gpu/include/migraphx/gpu/gemm.hpp
+2
-2
src/targets/gpu/include/migraphx/gpu/hip.hpp
src/targets/gpu/include/migraphx/gpu/hip.hpp
+1
-1
src/targets/gpu/include/migraphx/gpu/kernel.hpp
src/targets/gpu/include/migraphx/gpu/kernel.hpp
+5
-5
src/targets/gpu/include/migraphx/gpu/oper.hpp
src/targets/gpu/include/migraphx/gpu/oper.hpp
+2
-2
No files found.
src/targets/gpu/device/nonzero.cpp
View file @
94e3a2e4
...
...
@@ -36,7 +36,7 @@ argument nonzero(hipStream_t stream, const argument& result, const argument& arg
return
;
auto
index
=
si
.
multi
(
j
);
for
(
size_
t
k
=
0
;
k
<
index
.
size
();
++
k
)
for
(
in
t
k
=
0
;
k
<
index
.
size
();
++
k
)
{
ptr
[
k
*
elem_num
+
out_loc
]
=
index
[
k
];
}
...
...
src/targets/gpu/device/pad.cpp
View file @
94e3a2e4
...
...
@@ -15,7 +15,7 @@ namespace device {
argument
pad
(
hipStream_t
stream
,
argument
result
,
argument
arg1
,
float
value
,
std
::
vector
<
std
::
int64_t
>
pads
)
{
std
::
size_
t
nelements
=
arg1
.
get_shape
().
elements
();
in
t
nelements
=
arg1
.
get_shape
().
elements
();
hip_visit_all
(
result
,
arg1
)([
&
](
auto
output
,
auto
input
)
{
using
type
=
typename
decltype
(
output
)
::
value_type
;
using
hip_index
=
typename
decltype
(
output
)
::
hip_index
;
...
...
@@ -27,7 +27,7 @@ pad(hipStream_t stream, argument result, argument arg1, float value, std::vector
std
::
copy
(
pads
.
begin
(),
pads
.
begin
()
+
offsets
.
size
(),
offsets
.
begin
());
gs_launch
(
stream
,
nelements
)([
=
](
auto
i
)
__device__
{
auto
idx
=
input
.
get_shape
().
multi
(
i
);
for
(
std
::
size_
t
j
=
0
;
j
<
offsets
.
size
();
j
++
)
for
(
in
t
j
=
0
;
j
<
offsets
.
size
();
j
++
)
{
idx
[
j
]
+=
offsets
[
j
];
}
...
...
src/targets/gpu/device/reduce_mean.cpp
View file @
94e3a2e4
...
...
@@ -9,7 +9,7 @@ namespace device {
void
reduce_mean
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
{
index_int
item_num
=
arg
.
get_shape
().
elements
()
/
result
.
get_shape
().
elements
();
reduce
(
stream
,
result
,
arg
,
sum
{},
0
,
id
{},
mean
{
item_num
});
reduce
(
stream
,
result
,
arg
,
sum
{},
0
,
id
{},
mean
{
static_cast
<
int
>
(
item_num
)
});
}
}
// namespace device
...
...
src/targets/gpu/device/reverse.cpp
View file @
94e3a2e4
...
...
@@ -16,9 +16,9 @@ reverse(hipStream_t stream, argument result, argument arg1, const std::vector<in
{
auto
s
=
arg1
.
get_shape
();
// auto lens = s.lens();
std
::
vector
<
std
::
size_
t
>
axis_len
(
axes
.
begin
(),
axes
.
end
());
std
::
vector
<
in
t
>
axis_len
(
axes
.
begin
(),
axes
.
end
());
shape
sa
{
shape
::
float_type
,
axis_len
};
std
::
size_
t
nelements
=
s
.
elements
();
in
t
nelements
=
s
.
elements
();
visit_all
(
result
,
arg1
)([
&
](
auto
output1
,
auto
input1
)
{
hip_visit_views
(
output1
,
input1
,
s
)([
&
](
auto
output
,
auto
input
,
auto
hs
)
{
hip_visit_views
(
sa
)([
&
](
auto
daxes
)
{
...
...
src/targets/gpu/device/topk.cpp
View file @
94e3a2e4
...
...
@@ -142,7 +142,7 @@ std::vector<argument> topk(hipStream_t stream,
auto
comp_lens
=
in_lens
;
comp_lens
[
axis
]
=
1
;
shape
comp_s
{
in_s
.
type
(),
comp_lens
};
std
::
size_
t
elem_num
=
comp_s
.
elements
();
in
t
elem_num
=
comp_s
.
elements
();
hip_visit_all
(
val_res
,
arg
,
out_s
,
in_s
,
comp_s
)(
[
&
](
auto
out_val
,
auto
input
,
auto
oss
,
auto
iss
,
auto
css
)
{
...
...
src/targets/gpu/driver/parser.cpp
View file @
94e3a2e4
...
...
@@ -15,8 +15,8 @@ namespace driver {
shape
parser
::
parse_shape
(
const
value
&
v
)
const
{
auto
lens
=
get
(
v
,
"lens"
,
std
::
vector
<
std
::
size_
t
>
{});
auto
strides
=
get
(
v
,
"strides"
,
std
::
vector
<
std
::
size_
t
>
{});
auto
lens
=
get
(
v
,
"lens"
,
std
::
vector
<
in
t
>
{});
auto
strides
=
get
(
v
,
"strides"
,
std
::
vector
<
in
t
>
{});
auto
type
=
shape
::
parse_type
(
get
<
std
::
string
>
(
v
,
"type"
,
"float"
));
if
(
strides
.
empty
())
return
shape
{
type
,
lens
};
...
...
src/targets/gpu/eliminate_workspace.cpp
View file @
94e3a2e4
...
...
@@ -13,7 +13,7 @@ namespace gpu {
void
eliminate_workspace
::
apply
(
module
&
p
)
const
{
std
::
size_
t
n
=
0
;
in
t
n
=
0
;
std
::
vector
<
instruction_ref
>
allocs
;
for
(
auto
ins
:
iterator_for
(
p
))
{
...
...
src/targets/gpu/fuse_ops.cpp
View file @
94e3a2e4
...
...
@@ -64,7 +64,7 @@ struct fusion
bool
empty
()
const
{
return
fp
==
nullptr
;
}
op_t
operator
[](
std
::
size_
t
i
)
const
op_t
operator
[](
in
t
i
)
const
{
assert
(
fp
);
op_t
result
;
...
...
@@ -118,7 +118,7 @@ struct fusion
{
// assert(fp);
// TODO: Use zero workspace for now
std
::
size_
t
ws_size
=
0
;
in
t
ws_size
=
0
;
// int algo_count = 1;
// miopenConvFwdAlgorithm_t algo;
// miopenFusionPlanConvolutionGetAlgo(fp.get(), 1, &algo_count, &algo);
...
...
@@ -596,7 +596,7 @@ struct miopen_fusion
{
// Compensate for allocation
inputs
.
pop_back
();
std
::
size_
t
i
=
0
;
in
t
i
=
0
;
f
=
fusion
(
inputs
[
i
]);
i
++
;
std
::
vector
<
std
::
function
<
void
(
const
fused_operator_args
&
,
const
std
::
vector
<
argument
>&
)
>>
...
...
src/targets/gpu/gemm_impl.cpp
View file @
94e3a2e4
...
...
@@ -90,7 +90,7 @@ void gemm_impl(context& ctx,
}
auto
num_matrices
=
std
::
accumulate
(
out_lens
.
rbegin
()
+
2
,
out_lens
.
rend
(),
std
::
size_
t
{
1
},
std
::
multiplies
<
std
::
size_
t
>
());
out_lens
.
rbegin
()
+
2
,
out_lens
.
rend
(),
in
t
{
1
},
std
::
multiplies
<
in
t
>
());
if
(
num_matrices
==
1
)
{
// the rocblas_gemm API handles inputs and output matrices as
...
...
src/targets/gpu/hip.cpp
View file @
94e3a2e4
...
...
@@ -27,10 +27,10 @@ using hip_host_ptr = MIGRAPHX_MANAGE_PTR(void, hipHostUnregister);
std
::
string
hip_error
(
int
error
)
{
return
hipGetErrorString
(
static_cast
<
hipError_t
>
(
error
));
}
std
::
size_
t
get_available_gpu_memory
()
in
t
get_available_gpu_memory
()
{
size_t
free
;
size_t
total
;
std
::
size_t
free
;
std
::
size_t
total
;
auto
status
=
hipMemGetInfo
(
&
free
,
&
total
);
if
(
status
!=
hipSuccess
)
MIGRAPHX_THROW
(
"Failed getting available memory: "
+
hip_error
(
status
));
...
...
@@ -46,7 +46,7 @@ void* get_device_ptr(void* hptr)
return
result
;
}
hip_ptr
allocate_gpu
(
std
::
size_
t
sz
,
bool
host
=
false
)
hip_ptr
allocate_gpu
(
in
t
sz
,
bool
host
=
false
)
{
if
(
sz
>
get_available_gpu_memory
())
MIGRAPHX_THROW
(
"Memory not available to allocate buffer: "
+
std
::
to_string
(
sz
));
...
...
@@ -62,7 +62,7 @@ hip_ptr allocate_gpu(std::size_t sz, bool host = false)
return
hip_ptr
{
result
};
}
hip_host_ptr
register_on_gpu
(
void
*
ptr
,
std
::
size_
t
sz
)
hip_host_ptr
register_on_gpu
(
void
*
ptr
,
in
t
sz
)
{
auto
status
=
hipHostRegister
(
ptr
,
sz
,
hipHostRegisterMapped
);
if
(
status
!=
hipSuccess
)
...
...
@@ -71,7 +71,7 @@ hip_host_ptr register_on_gpu(void* ptr, std::size_t sz)
}
template
<
class
T
>
std
::
vector
<
T
>
read_from_gpu
(
const
void
*
x
,
std
::
size_
t
sz
)
std
::
vector
<
T
>
read_from_gpu
(
const
void
*
x
,
in
t
sz
)
{
gpu_sync
();
std
::
vector
<
T
>
result
(
sz
);
...
...
@@ -81,7 +81,7 @@ std::vector<T> read_from_gpu(const void* x, std::size_t sz)
return
result
;
}
hip_ptr
write_to_gpu
(
const
void
*
x
,
std
::
size_
t
sz
,
bool
host
=
false
)
hip_ptr
write_to_gpu
(
const
void
*
x
,
in
t
sz
,
bool
host
=
false
)
{
gpu_sync
();
auto
result
=
allocate_gpu
(
sz
,
host
);
...
...
@@ -133,7 +133,7 @@ argument from_gpu(const argument& arg)
return
result
;
}
void
set_device
(
std
::
size_
t
id
)
void
set_device
(
in
t
id
)
{
auto
status
=
hipSetDevice
(
id
);
if
(
status
!=
hipSuccess
)
...
...
@@ -151,8 +151,8 @@ void gpu_sync(const context& ctx) { ctx.finish(); }
void
hip_async_copy
(
context
&
ctx
,
const
argument
&
src
,
const
argument
&
dst
,
hipMemcpyKind
kind
)
{
std
::
size_
t
src_size
=
src
.
get_shape
().
bytes
();
std
::
size_
t
dst_size
=
dst
.
get_shape
().
bytes
();
in
t
src_size
=
src
.
get_shape
().
bytes
();
in
t
dst_size
=
dst
.
get_shape
().
bytes
();
if
(
src_size
>
dst_size
)
MIGRAPHX_THROW
(
"Not enough memory available in destination to do copy"
);
auto
status
=
hipMemcpyAsync
(
dst
.
data
(),
src
.
data
(),
src_size
,
kind
,
ctx
.
get_stream
().
get
());
...
...
src/targets/gpu/include/migraphx/gpu/code_object_op.hpp
View file @
94e3a2e4
...
...
@@ -17,8 +17,8 @@ struct code_object_op
{
value
::
binary
code_object
;
std
::
string
symbol_name
;
std
::
size_
t
global
;
std
::
size_
t
local
;
in
t
global
;
in
t
local
;
std
::
vector
<
shape
>
expected_inputs
;
shape
output
;
kernel
k
{};
...
...
src/targets/gpu/include/migraphx/gpu/compile_hip.hpp
View file @
94e3a2e4
...
...
@@ -15,9 +15,9 @@ namespace gpu {
std
::
vector
<
std
::
vector
<
char
>>
compile_hip_src
(
const
std
::
vector
<
src_file
>&
srcs
,
std
::
string
params
,
const
std
::
string
&
arch
);
std
::
string
enum_params
(
std
::
size_
t
count
,
std
::
string
param
);
std
::
string
enum_params
(
in
t
count
,
std
::
string
param
);
std
::
size_
t
compute_global
(
std
::
size_t
n
,
std
::
size_
t
local
=
1024
);
in
t
compute_global
(
int
n
,
in
t
local
=
1024
);
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
...
...
src/targets/gpu/include/migraphx/gpu/compile_hip_code_object.hpp
View file @
94e3a2e4
...
...
@@ -10,8 +10,8 @@ namespace gpu {
struct
hip_compile_options
{
std
::
size_
t
global
;
std
::
size_
t
local
;
in
t
global
;
in
t
local
;
std
::
vector
<
shape
>
inputs
;
shape
output
;
std
::
string
kernel_name
=
"kernel"
;
...
...
src/targets/gpu/include/migraphx/gpu/context.hpp
View file @
94e3a2e4
...
...
@@ -29,13 +29,13 @@ struct hip_device
add_stream
();
}
hip_device
(
std
::
size_t
id
,
std
::
size_
t
n
)
:
device_id
(
id
)
hip_device
(
int
id
,
in
t
n
)
:
device_id
(
id
)
{
auto
status
=
hipGetDeviceProperties
(
&
device_props
,
device_id
);
if
(
status
!=
hipSuccess
)
MIGRAPHX_THROW
(
"Failed to allocate stream"
);
for
(
std
::
size_
t
i
=
0
;
i
<
n
;
i
++
)
for
(
in
t
i
=
0
;
i
<
n
;
i
++
)
add_stream
();
}
...
...
@@ -45,7 +45,7 @@ struct hip_device
stream
()
{}
stream
(
std
::
size_
t
device_number
)
:
id
(
device_number
)
{}
stream
(
in
t
device_number
)
:
id
(
device_number
)
{}
void
setup
()
const
{
set_device
(
id
);
}
...
...
@@ -124,7 +124,7 @@ struct hip_device
}
private:
std
::
size_
t
id
=
0
;
in
t
id
=
0
;
shared
<
hip_stream_ptr
>
s
=
nullptr
;
shared
<
miopen_handle
>
mihandle
=
nullptr
;
shared
<
rocblas_handle_ptr
>
rbhandle
=
nullptr
;
...
...
@@ -134,29 +134,29 @@ struct hip_device
stream
&
get_stream
()
{
return
streams
.
at
(
current_stream
);
}
stream
&
get_stream
(
std
::
size_
t
n
)
{
return
streams
.
at
(
n
);
}
stream
&
get_stream
(
in
t
n
)
{
return
streams
.
at
(
n
);
}
const
stream
&
get_stream
()
const
{
return
streams
.
at
(
current_stream
);
}
const
stream
&
get_stream
(
std
::
size_
t
n
)
const
{
return
streams
.
at
(
n
);
}
const
stream
&
get_stream
(
in
t
n
)
const
{
return
streams
.
at
(
n
);
}
void
set_stream
(
std
::
size_
t
n
)
{
current_stream
=
n
;
}
void
set_stream
(
in
t
n
)
{
current_stream
=
n
;
}
std
::
size_
t
nstreams
()
const
{
return
streams
.
size
();
}
in
t
nstreams
()
const
{
return
streams
.
size
();
}
std
::
size_
t
stream_id
()
const
{
return
current_stream
;
}
in
t
stream_id
()
const
{
return
current_stream
;
}
std
::
string
get_device_name
()
const
{
return
device_props
.
gcnArchName
;
}
std
::
size_
t
get_device_major
()
const
{
return
device_props
.
major
;
}
in
t
get_device_major
()
const
{
return
device_props
.
major
;
}
std
::
size_
t
get_device_minor
()
const
{
return
device_props
.
minor
;
}
in
t
get_device_minor
()
const
{
return
device_props
.
minor
;
}
std
::
size_
t
get_cu_count
()
const
{
return
device_props
.
multiProcessorCount
;
}
in
t
get_cu_count
()
const
{
return
device_props
.
multiProcessorCount
;
}
private:
std
::
size_
t
device_id
=
0
;
std
::
size_
t
current_stream
=
0
;
in
t
device_id
=
0
;
in
t
current_stream
=
0
;
std
::
vector
<
stream
>
streams
;
hipDeviceProp_t
device_props
;
...
...
@@ -166,7 +166,7 @@ struct hip_device
struct
context
{
context
(
std
::
size_
t
device_id
=
0
,
std
::
size_
t
n
=
value_of
(
MIGRAPHX_NSTREAMS
{},
1
))
context
(
in
t
device_id
=
0
,
in
t
n
=
value_of
(
MIGRAPHX_NSTREAMS
{},
1
))
:
current_device
(
std
::
make_shared
<
hip_device
>
(
device_id
,
n
))
{
}
...
...
@@ -184,23 +184,23 @@ struct context
}
hip_device
::
stream
&
get_stream
()
{
return
get_current_device
().
get_stream
();
}
hip_device
::
stream
&
get_stream
(
std
::
size_
t
n
)
{
return
get_current_device
().
get_stream
(
n
);
}
hip_device
::
stream
&
get_stream
(
in
t
n
)
{
return
get_current_device
().
get_stream
(
n
);
}
const
hip_device
::
stream
&
get_stream
()
const
{
return
get_current_device
().
get_stream
();
}
const
hip_device
::
stream
&
get_stream
(
std
::
size_
t
n
)
const
const
hip_device
::
stream
&
get_stream
(
in
t
n
)
const
{
return
get_current_device
().
get_stream
(
n
);
}
void
set_stream
(
std
::
size_
t
n
)
{
get_current_device
().
set_stream
(
n
);
}
void
set_stream
(
in
t
n
)
{
get_current_device
().
set_stream
(
n
);
}
void
create_events
(
std
::
size_
t
num_of_events
)
void
create_events
(
in
t
num_of_events
)
{
for
(
std
::
size_
t
i
=
events
.
size
();
i
<
num_of_events
+
1
;
++
i
)
for
(
in
t
i
=
events
.
size
();
i
<
num_of_events
+
1
;
++
i
)
events
.
emplace_back
(
create_event
());
}
hipEvent_t
get_event
(
std
::
size_
t
i
)
const
{
return
events
.
at
(
i
).
get
();
}
hipEvent_t
get_event
(
in
t
i
)
const
{
return
events
.
at
(
i
).
get
();
}
std
::
vector
<
argument
>
literals
{};
void
finish
()
const
{
get_stream
().
wait
();
}
...
...
@@ -226,11 +226,11 @@ struct context
void
from_value
(
const
value
&
v
)
{
auto
v_events
=
v
.
at
(
"events"
);
std
::
size_
t
n_events
=
v_events
.
without_key
().
to
<
std
::
size_
t
>
();
in
t
n_events
=
v_events
.
without_key
().
to
<
in
t
>
();
this
->
create_events
(
n_events
-
1
);
auto
v_streams
=
v
.
at
(
"streams"
);
std
::
size_
t
n_streams
=
v_streams
.
without_key
().
to
<
std
::
size_
t
>
();
in
t
n_streams
=
v_streams
.
without_key
().
to
<
in
t
>
();
this
->
current_device
=
std
::
make_shared
<
hip_device
>
(
0
,
n_streams
);
}
...
...
src/targets/gpu/include/migraphx/gpu/device/arg_op.hpp
View file @
94e3a2e4
...
...
@@ -73,7 +73,7 @@ void arg_op(Op op, hipStream_t stream, const argument& result, const argument& a
{
auto
arg_shape
=
arg
.
get_shape
();
auto
batch_lens
=
arg_shape
.
lens
();
size_
t
batch_item_num
=
batch_lens
[
axis
];
in
t
batch_item_num
=
batch_lens
[
axis
];
batch_lens
[
axis
]
=
1
;
migraphx
::
shape
batch_shape
{
arg_shape
.
type
(),
batch_lens
};
migraphx
::
shape
std_arg_shape
{
arg_shape
.
type
(),
arg_shape
.
lens
()};
...
...
@@ -82,8 +82,8 @@ void arg_op(Op op, hipStream_t stream, const argument& result, const argument& a
auto
*
output
=
device_cast
(
result
.
get
<
int64_t
>
().
data
());
using
type
=
device_type
<
std
::
remove_cv_t
<
typename
decltype
(
input
)
::
value_type
>>
;
// use one block for items in one batch.
const
size_
t
max_block_size
=
256
;
const
std
::
size_
t
block_size
=
compute_block_size
(
batch_item_num
,
max_block_size
);
const
in
t
max_block_size
=
256
;
const
in
t
block_size
=
compute_block_size
(
batch_item_num
,
max_block_size
);
gs_launch
(
stream
,
batch_shape
.
elements
()
*
block_size
,
block_size
)([
=
](
auto
i
,
auto
idx
)
__device__
{
...
...
src/targets/gpu/include/migraphx/gpu/device/concat.hpp
View file @
94e3a2e4
...
...
@@ -13,7 +13,7 @@ namespace device {
argument
concat
(
hipStream_t
stream
,
const
shape
&
output_shape
,
std
::
vector
<
argument
>
args
,
std
::
vector
<
std
::
size_
t
>
offsets
);
std
::
vector
<
in
t
>
offsets
);
}
// namespace device
}
// namespace gpu
...
...
src/targets/gpu/include/migraphx/gpu/gemm.hpp
View file @
94e3a2e4
...
...
@@ -89,13 +89,13 @@ struct rocblas_gemm
return
args
.
back
();
}
void
batch_not_transposed
(
const
std
::
vector
<
std
::
size_
t
>&
strides
)
const
void
batch_not_transposed
(
const
std
::
vector
<
in
t
>&
strides
)
const
{
if
(
strides
.
size
()
<=
2
)
return
;
auto
dim_0
=
strides
.
size
()
-
2
;
auto
matrix_size
=
std
::
max
(
strides
[
dim_0
],
strides
[
dim_0
+
1
]);
std
::
vector
<
std
::
size_
t
>
batch
(
strides
.
begin
(),
strides
.
begin
()
+
dim_0
);
std
::
vector
<
in
t
>
batch
(
strides
.
begin
(),
strides
.
begin
()
+
dim_0
);
if
(
std
::
all_of
(
batch
.
begin
(),
batch
.
end
(),
[
&
](
auto
i
)
{
return
(
i
<
matrix_size
);
}))
{
MIGRAPHX_THROW
(
"GPU_GEMM: matrix size and batch size {"
+
to_string_range
(
strides
)
+
...
...
src/targets/gpu/include/migraphx/gpu/hip.hpp
View file @
94e3a2e4
...
...
@@ -22,7 +22,7 @@ argument to_gpu(const argument& arg, bool host = false);
argument
from_gpu
(
const
argument
&
arg
);
void
set_device
(
std
::
size_
t
id
);
void
set_device
(
in
t
id
);
void
gpu_sync
();
void
gpu_sync
(
const
context
&
ctx
);
...
...
src/targets/gpu/include/migraphx/gpu/kernel.hpp
View file @
94e3a2e4
...
...
@@ -25,16 +25,16 @@ struct kernel
}
void
launch
(
hipStream_t
stream
,
std
::
size_
t
global
,
std
::
size_
t
local
,
in
t
global
,
in
t
local
,
const
std
::
vector
<
kernel_argument
>&
args
)
const
;
void
launch
(
hipStream_t
stream
,
std
::
size_
t
global
,
std
::
size_
t
local
,
in
t
global
,
in
t
local
,
std
::
vector
<
void
*>
args
)
const
;
auto
launch
(
hipStream_t
stream
,
std
::
size_t
global
,
std
::
size_
t
local
)
const
auto
launch
(
hipStream_t
stream
,
int
global
,
in
t
local
)
const
{
return
[
=
](
auto
&&
...
xs
)
{
launch
(
stream
,
global
,
local
,
std
::
vector
<
kernel_argument
>
{
xs
...});
...
...
src/targets/gpu/include/migraphx/gpu/oper.hpp
View file @
94e3a2e4
...
...
@@ -16,7 +16,7 @@ namespace migraphx {
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
template
<
class
Derived
,
std
::
size_
t
N
>
template
<
class
Derived
,
in
t
N
>
struct
device_base
:
oper
<
Derived
>
{
template
<
class
Self
,
class
F
>
...
...
@@ -32,7 +32,7 @@ struct device_base : oper<Derived>
reduce_shapes
=
reduce_dims
(
inputs
);
}
argument
get_arg
(
const
std
::
vector
<
argument
>&
args
,
std
::
size_
t
i
)
const
argument
get_arg
(
const
std
::
vector
<
argument
>&
args
,
in
t
i
)
const
{
if
(
reduce_shapes
.
empty
())
return
args
[
i
];
...
...
Prev
1
…
3
4
5
6
7
8
9
10
11
…
13
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment