Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
6596ee39
Commit
6596ee39
authored
Jun 26, 2019
by
Shucai Xiao
Browse files
merge changes from branch softmax/logsoftmax optimization
parents
613772dd
38369866
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
59 additions
and
70 deletions
+59
-70
src/targets/gpu/device/logsoftmax.cpp
src/targets/gpu/device/logsoftmax.cpp
+6
-2
src/targets/gpu/device/softmax.cpp
src/targets/gpu/device/softmax.cpp
+4
-3
src/targets/gpu/include/migraphx/gpu/device/reduce_opers.hpp
src/targets/gpu/include/migraphx/gpu/device/reduce_opers.hpp
+25
-65
src/tf/tf.cpp
src/tf/tf.cpp
+1
-0
test/tf/sub_test.pb
test/tf/sub_test.pb
+12
-0
test/tf/tf_test.cpp
test/tf/tf_test.cpp
+11
-0
No files found.
src/targets/gpu/device/logsoftmax.cpp
View file @
6596ee39
...
...
@@ -53,7 +53,9 @@ void logsoftmax(hipStream_t stream, const argument& result, const argument& arg,
__syncthreads
();
auto
item_num
=
(
remaining_item_num
>
block_size
)
?
block_size
:
remaining_item_num
;
reduce_max
(
lds_data
,
block_size
,
thr_idx
,
item_num
,
max_block_size
);
// reduce_max(lds_data, block_size, thr_idx, item_num, max_block_size);
block_reduce
<
type
,
max_op
<
type
>>
(
lds_data
,
max_op
<
type
>
{},
block_size
,
thr_idx
,
item_num
,
max_block_size
);
remaining_item_num
-=
block_size
;
}
...
...
@@ -75,7 +77,9 @@ void logsoftmax(hipStream_t stream, const argument& result, const argument& arg,
__syncthreads
();
auto
item_num
=
(
remaining_item_num
>
block_size
)
?
block_size
:
remaining_item_num
;
reduce_sum
(
lds_data
,
block_size
,
thr_idx
,
item_num
,
max_block_size
);
// reduce_sum(lds_data, block_size, thr_idx, item_num, max_block_size);
block_reduce
<
type
,
sum_op
<
type
>>
(
lds_data
,
sum_op
<
type
>
{},
block_size
,
thr_idx
,
item_num
,
max_block_size
);
remaining_item_num
-=
block_size
;
}
...
...
src/targets/gpu/device/softmax.cpp
View file @
6596ee39
...
...
@@ -54,8 +54,8 @@ void softmax(hipStream_t stream, const argument& result, const argument& arg, in
__syncthreads
();
auto
item_num
=
(
remaining_item_num
>
block_size
)
?
block_size
:
remaining_item_num
;
reduce_max
(
lds_data
,
block_size
,
thr_idx
,
item_num
,
max_block_size
);
block_reduce
<
type
,
max_op
<
type
>>
(
lds_data
,
max_op
<
type
>
{},
block_size
,
thr_idx
,
item_num
,
max_block_size
);
remaining_item_num
-=
block_size
;
}
...
...
@@ -76,7 +76,8 @@ void softmax(hipStream_t stream, const argument& result, const argument& arg, in
__syncthreads
();
auto
item_num
=
(
remaining_item_num
>
block_size
)
?
block_size
:
remaining_item_num
;
reduce_sum
(
lds_data
,
block_size
,
thr_idx
,
item_num
,
max_block_size
);
block_reduce
<
type
,
sum_op
<
type
>>
(
lds_data
,
sum_op
<
type
>
{},
block_size
,
thr_idx
,
item_num
,
max_block_size
);
remaining_item_num
-=
block_size
;
}
...
...
src/targets/gpu/include/migraphx/gpu/device/reduce_opers.hpp
View file @
6596ee39
...
...
@@ -11,42 +11,30 @@ namespace gpu {
namespace
device
{
template
<
class
T
>
inline
__device__
void
reduce_max
(
T
*
data_ptr
,
std
::
size_t
block_size
,
std
::
size_t
thr_idx
,
std
::
size_t
item_num
,
std
::
size_t
max_index
)
struct
max_op
{
while
(
true
)
{
auto
stride
=
(
item_num
+
1
)
/
2
;
auto
size
=
item_num
/
2
;
for
(
std
::
size_t
i
=
thr_idx
;
i
<
size
;
i
+=
block_size
)
{
data_ptr
[
i
]
=
::
max
(
to_hip_type
(
data_ptr
[
i
]),
to_hip_type
(
data_ptr
[
i
+
stride
]));
}
__syncthreads
();
item_num
=
stride
;
T
operator
()(
T
x
,
T
y
)
{
return
(
x
>
y
)
?
x
:
y
;
}
};
if
(
item_num
==
1
)
break
;
}
if
(
thr_idx
==
0
)
{
data_ptr
[
max_index
]
=
(
data_ptr
[
0
]
<
data_ptr
[
max_index
])
?
data_ptr
[
max_index
]
:
data_ptr
[
0
];
}
__syncthreads
();
}
template
<
class
T
>
struct
min_op
{
T
operator
()(
T
x
,
T
y
)
{
return
(
x
<
y
)
?
x
:
y
;
}
};
template
<
class
T
>
inline
__device__
void
reduce_min
(
T
*
data_ptr
,
std
::
size_t
block_size
,
std
::
size_t
thr_idx
,
std
::
size_t
item_num
,
std
::
size_t
min_index
)
struct
sum_op
{
T
operator
()(
T
x
,
T
y
)
{
return
x
+
y
;
}
};
template
<
class
T
,
class
Op
>
inline
__device__
void
block_reduce
(
T
*
data_ptr
,
Op
op
,
std
::
size_t
block_size
,
std
::
size_t
thr_idx
,
std
::
size_t
item_num
,
std
::
size_t
max_index
)
{
while
(
true
)
{
...
...
@@ -54,7 +42,8 @@ inline __device__ void reduce_min(T* data_ptr,
auto
size
=
item_num
/
2
;
for
(
std
::
size_t
i
=
thr_idx
;
i
<
size
;
i
+=
block_size
)
{
data_ptr
[
i
]
=
::
min
(
to_hip_type
(
data_ptr
[
i
]),
to_hip_type
(
data_ptr
[
i
+
stride
]));
// data_ptr[i] = ::max(to_hip_type(data_ptr[i]), to_hip_type(data_ptr[i + stride]));
data_ptr
[
i
]
=
op
(
data_ptr
[
i
],
data_ptr
[
i
+
stride
]);
}
__syncthreads
();
item_num
=
stride
;
...
...
@@ -65,8 +54,9 @@ inline __device__ void reduce_min(T* data_ptr,
if
(
thr_idx
==
0
)
{
data_ptr
[
min_index
]
=
(
data_ptr
[
0
]
>
data_ptr
[
min_index
])
?
data_ptr
[
min_index
]
:
data_ptr
[
0
];
// data_ptr[max_index] =
// (data_ptr[0] < data_ptr[max_index]) ? data_ptr[max_index] : data_ptr[0];
data_ptr
[
max_index
]
=
op
(
data_ptr
[
max_index
],
data_ptr
[
0
]);
}
__syncthreads
();
...
...
@@ -150,36 +140,6 @@ inline __device__ void reduce_argmin(T* data_ptr,
__syncthreads
();
}
template
<
class
T
>
inline
__device__
void
reduce_sum
(
T
*
data_ptr
,
std
::
size_t
block_size
,
std
::
size_t
thr_idx
,
std
::
size_t
item_num
,
std
::
size_t
sum_index
)
{
while
(
true
)
{
auto
stride
=
(
item_num
+
1
)
/
2
;
auto
size
=
item_num
/
2
;
for
(
std
::
size_t
i
=
thr_idx
;
i
<
size
;
i
+=
block_size
)
{
data_ptr
[
i
]
+=
data_ptr
[
i
+
stride
];
}
__syncthreads
();
item_num
=
stride
;
if
(
item_num
==
1
)
break
;
}
if
(
thr_idx
==
0
)
{
data_ptr
[
sum_index
]
+=
data_ptr
[
0
];
}
__syncthreads
();
}
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
...
...
src/tf/tf.cpp
View file @
6596ee39
...
...
@@ -114,6 +114,7 @@ struct tf_parser
add_binary_op
(
"Add"
,
op
::
add
{});
add_binary_op
(
"Mul"
,
op
::
mul
{});
add_binary_op
(
"Sub"
,
op
::
sub
{});
add_mem_op
(
"AvgPool"
,
&
tf_parser
::
parse_pooling
);
add_mem_op
(
"BiasAdd"
,
&
tf_parser
::
parse_biasadd
);
...
...
test/tf/sub_test.pb
0 → 100644
View file @
6596ee39
:
0Placeholder*
shape:*
dtype0
:
1Placeholder*
shape:*
dtype0
sub1Sub01*
T0"
\ No newline at end of file
test/tf/tf_test.cpp
View file @
6596ee39
...
...
@@ -359,4 +359,15 @@ TEST_CASE(stridedslice_test)
EXPECT
(
p
==
prog
);
}
TEST_CASE
(
sub_test
)
{
migraphx
::
program
p
;
auto
l0
=
p
.
add_parameter
(
"0"
,
migraphx
::
shape
{
migraphx
::
shape
::
float_type
,
{
1
,
2
,
2
,
3
}});
auto
l1
=
p
.
add_parameter
(
"1"
,
migraphx
::
shape
{
migraphx
::
shape
::
float_type
,
{
1
,
2
,
2
,
3
}});
p
.
add_instruction
(
migraphx
::
op
::
sub
{},
l0
,
l1
);
auto
prog
=
migraphx
::
parse_tf
(
"sub_test.pb"
,
false
);
EXPECT
(
p
==
prog
);
}
int
main
(
int
argc
,
const
char
*
argv
[])
{
test
::
run
(
argc
,
argv
);
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment