Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
6d1c23e9
Commit
6d1c23e9
authored
Jun 24, 2019
by
Shucai Xiao
Browse files
clang format
parent
b8782a5f
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
44 additions
and
42 deletions
+44
-42
src/targets/cpu/lowering.cpp
src/targets/cpu/lowering.cpp
+15
-15
src/targets/gpu/device/logsoftmax.cpp
src/targets/gpu/device/logsoftmax.cpp
+15
-14
src/targets/gpu/device/softmax.cpp
src/targets/gpu/device/softmax.cpp
+14
-13
No files found.
src/targets/cpu/lowering.cpp
View file @
6d1c23e9
...
...
@@ -549,7 +549,7 @@ struct cpu_softmax
{
argument
result
{
output_shape
};
auto
batch_lens
=
output_shape
.
lens
();
size_t
n_dims
=
batch_lens
[
op
.
axis
];
size_t
n_dims
=
batch_lens
[
op
.
axis
];
batch_lens
[
op
.
axis
]
=
1
;
shape
batch_shape
{
shape
::
int32_type
,
batch_lens
};
...
...
@@ -561,31 +561,31 @@ struct cpu_softmax
par_for
(
batch_shape
.
elements
(),
[
&
](
auto
i
)
{
auto
idx
=
compute_batch_indices
(
i
,
batch_shape
);
for
(
size_t
j
=
0
;
j
<
n_dims
;
++
j
)
for
(
size_t
j
=
0
;
j
<
n_dims
;
++
j
)
{
idx
[
op
.
axis
]
=
j
;
batch_max
[
i
]
=
std
::
max
(
batch_max
[
i
],
input
(
idx
.
begin
(),
idx
.
end
()));
}
for
(
size_t
j
=
0
;
j
<
n_dims
;
++
j
)
for
(
size_t
j
=
0
;
j
<
n_dims
;
++
j
)
{
idx
[
op
.
axis
]
=
j
;
size_t
index
=
output_shape
.
index
(
idx
);
idx
[
op
.
axis
]
=
j
;
size_t
index
=
output_shape
.
index
(
idx
);
output
[
index
]
=
std
::
exp
(
input
[
index
]
-
batch_max
[
i
]);
}
for
(
size_t
j
=
0
;
j
<
n_dims
;
++
j
)
for
(
size_t
j
=
0
;
j
<
n_dims
;
++
j
)
{
idx
[
op
.
axis
]
=
j
;
batch_sum
[
i
]
+=
output
(
idx
.
begin
(),
idx
.
end
());
}
for
(
size_t
j
=
0
;
j
<
n_dims
;
++
j
)
for
(
size_t
j
=
0
;
j
<
n_dims
;
++
j
)
{
idx
[
op
.
axis
]
=
j
;
output
(
idx
.
begin
(),
idx
.
end
())
/=
batch_sum
[
i
];
}
});
});
});
return
result
;
...
...
@@ -624,7 +624,7 @@ struct cpu_logsoftmax
{
argument
result
{
output_shape
};
auto
batch_lens
=
output_shape
.
lens
();
size_t
n_dims
=
batch_lens
[
op
.
axis
];
size_t
n_dims
=
batch_lens
[
op
.
axis
];
batch_lens
[
op
.
axis
]
=
1
;
shape
batch_shape
{
shape
::
int32_type
,
batch_lens
};
...
...
@@ -638,20 +638,20 @@ struct cpu_logsoftmax
par_for
(
batch_shape
.
elements
(),
[
&
](
auto
i
)
{
auto
idx
=
compute_batch_indices
(
i
,
batch_shape
);
for
(
size_t
j
=
0
;
j
<
n_dims
;
++
j
)
for
(
size_t
j
=
0
;
j
<
n_dims
;
++
j
)
{
idx
[
op
.
axis
]
=
j
;
batch_max
[
i
]
=
std
::
max
(
batch_max
[
i
],
input
(
idx
.
begin
(),
idx
.
end
()));
}
for
(
size_t
j
=
0
;
j
<
n_dims
;
++
j
)
for
(
size_t
j
=
0
;
j
<
n_dims
;
++
j
)
{
idx
[
op
.
axis
]
=
j
;
size_t
index
=
output_shape
.
index
(
idx
);
idx
[
op
.
axis
]
=
j
;
size_t
index
=
output_shape
.
index
(
idx
);
output
[
index
]
=
input
[
index
]
-
batch_max
[
i
];
}
for
(
size_t
j
=
0
;
j
<
n_dims
;
++
j
)
for
(
size_t
j
=
0
;
j
<
n_dims
;
++
j
)
{
idx
[
op
.
axis
]
=
j
;
batch_sum
[
i
]
+=
std
::
exp
(
output
(
idx
.
begin
(),
idx
.
end
()));
...
...
@@ -659,7 +659,7 @@ struct cpu_logsoftmax
batch_sum
[
i
]
=
std
::
log
(
batch_sum
[
i
]);
for
(
size_t
j
=
0
;
j
<
n_dims
;
++
j
)
for
(
size_t
j
=
0
;
j
<
n_dims
;
++
j
)
{
idx
[
op
.
axis
]
=
j
;
output
(
idx
.
begin
(),
idx
.
end
())
-=
batch_sum
[
i
];
...
...
src/targets/gpu/device/logsoftmax.cpp
View file @
6d1c23e9
...
...
@@ -17,10 +17,10 @@ argument logsoftmax(hipStream_t stream,
int
axis
)
{
auto
lens
=
output_shape
.
lens
();
auto
n_dims
=
lens
[
axis
];
auto
batch_lens
=
lens
;
batch_lens
[
axis
]
=
1
;
auto
lens
=
output_shape
.
lens
();
auto
n_dims
=
lens
[
axis
];
auto
batch_lens
=
lens
;
batch_lens
[
axis
]
=
1
;
migraphx
::
shape
batch_shape
{
output_shape
.
type
(),
batch_lens
};
visit_all
(
args
.
back
(),
args
.
front
())([
&
](
auto
output
,
auto
input
)
{
...
...
@@ -34,8 +34,8 @@ argument logsoftmax(hipStream_t stream,
// opt 1, load all data to lds then use the same approach as
// the current optimization
const
size_t
max_block_size
=
1024
;
size_t
block_size
=
1
;
while
(
block_size
<
max_block_size
and
block_size
<
n_dim
)
size_t
block_size
=
1
;
while
(
block_size
<
max_block_size
and
block_size
<
n_dim
)
{
block_size
*=
2
;
}
...
...
@@ -53,14 +53,14 @@ argument logsoftmax(hipStream_t stream,
auto
data_idx
=
batch_idx
;
// load data to lds and compute the batch max
size_t
item_num
=
n_dims
;
size_t
thread_num
=
(
n_dims
+
block_size
-
1
)
/
block_size
*
block_size
;
size_t
thread_num
=
(
n_dims
+
block_size
-
1
)
/
block_size
*
block_size
;
lds_data
[
block_size
]
=
input_ptr
[
0
];
for
(
size_t
i
=
thr_idx
;
i
<
thread_num
;
i
+=
block_size
)
{
if
(
i
<
n_dims
)
if
(
i
<
n_dims
)
{
data_idx
[
axis
]
=
i
;
lds_data
[
thr_idx
]
=
input_ptr
[
desc_data
.
linear
(
data_idx
)];
data_idx
[
axis
]
=
i
;
lds_data
[
thr_idx
]
=
input_ptr
[
desc_data
.
linear
(
data_idx
)];
}
__syncthreads
();
...
...
@@ -97,13 +97,14 @@ argument logsoftmax(hipStream_t stream,
item_num
=
n_dims
;
for
(
size_t
i
=
thr_idx
;
i
<
thread_num
;
i
+=
block_size
)
{
if
(
i
<
n_dims
)
if
(
i
<
n_dims
)
{
data_idx
[
axis
]
=
i
;
lds_data
[
thr_idx
]
=
input_ptr
[
desc_data
.
linear
(
data_idx
)]
-
lds_data
[
block_size
];
lds_data
[
thr_idx
]
=
::
exp
(
to_hip_type
(
lds_data
[
thr_idx
]));
lds_data
[
thr_idx
]
=
input_ptr
[
desc_data
.
linear
(
data_idx
)]
-
lds_data
[
block_size
];
lds_data
[
thr_idx
]
=
::
exp
(
to_hip_type
(
lds_data
[
thr_idx
]));
}
__syncthreads
();
auto
size
=
(
item_num
>
block_size
)
?
block_size
:
item_num
;
...
...
src/targets/gpu/device/softmax.cpp
View file @
6d1c23e9
...
...
@@ -32,8 +32,8 @@ argument softmax(hipStream_t stream,
// use one block for items in one batch.
const
size_t
max_block_size
=
1024
;
size_t
block_size
=
1
;
while
(
block_size
<
max_block_size
and
block_size
<
n_dims
)
size_t
block_size
=
1
;
while
(
block_size
<
max_block_size
and
block_size
<
n_dims
)
{
block_size
*=
2
;
}
...
...
@@ -50,16 +50,16 @@ argument softmax(hipStream_t stream,
auto
batch_idx
=
desc_batch
.
multi
(
blk_idx
);
auto
data_idx
=
batch_idx
;
// load data to lds and compute the batch max
size_t
item_num
=
n_dims
;
size_t
thread_num
=
(
n_dims
+
block_size
-
1
)
/
block_size
*
block_size
;
lds_data
[
block_size
]
=
input_ptr
[
0
];
lds_data
[
block_size
+
1
]
=
0
;
size_t
item_num
=
n_dims
;
size_t
thread_num
=
(
n_dims
+
block_size
-
1
)
/
block_size
*
block_size
;
lds_data
[
block_size
]
=
input_ptr
[
0
];
lds_data
[
block_size
+
1
]
=
0
;
for
(
size_t
i
=
thr_idx
;
i
<
thread_num
;
i
+=
block_size
)
{
if
(
i
<
n_dims
)
if
(
i
<
n_dims
)
{
data_idx
[
axis
]
=
i
;
lds_data
[
thr_idx
]
=
input_ptr
[
desc_data
.
linear
(
data_idx
)];
data_idx
[
axis
]
=
i
;
lds_data
[
thr_idx
]
=
input_ptr
[
desc_data
.
linear
(
data_idx
)];
}
__syncthreads
();
...
...
@@ -92,14 +92,15 @@ argument softmax(hipStream_t stream,
item_num
-=
block_size
;
}
item_num
=
n_dims
;
item_num
=
n_dims
;
for
(
size_t
i
=
thr_idx
;
i
<
thread_num
;
i
+=
block_size
)
{
if
(
i
<
n_dims
)
if
(
i
<
n_dims
)
{
data_idx
[
axis
]
=
i
;
lds_data
[
thr_idx
]
=
input_ptr
[
desc_data
.
linear
(
data_idx
)]
-
lds_data
[
block_size
];
lds_data
[
thr_idx
]
=
::
exp
(
to_hip_type
(
lds_data
[
thr_idx
]));
lds_data
[
thr_idx
]
=
input_ptr
[
desc_data
.
linear
(
data_idx
)]
-
lds_data
[
block_size
];
lds_data
[
thr_idx
]
=
::
exp
(
to_hip_type
(
lds_data
[
thr_idx
]));
}
__syncthreads
();
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment