Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
6d1c23e9
Commit
6d1c23e9
authored
Jun 24, 2019
by
Shucai Xiao
Browse files
clang format
parent
b8782a5f
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
44 additions
and
42 deletions
+44
-42
src/targets/cpu/lowering.cpp
src/targets/cpu/lowering.cpp
+15
-15
src/targets/gpu/device/logsoftmax.cpp
src/targets/gpu/device/logsoftmax.cpp
+15
-14
src/targets/gpu/device/softmax.cpp
src/targets/gpu/device/softmax.cpp
+14
-13
No files found.
src/targets/cpu/lowering.cpp
View file @
6d1c23e9
...
@@ -549,7 +549,7 @@ struct cpu_softmax
...
@@ -549,7 +549,7 @@ struct cpu_softmax
{
{
argument
result
{
output_shape
};
argument
result
{
output_shape
};
auto
batch_lens
=
output_shape
.
lens
();
auto
batch_lens
=
output_shape
.
lens
();
size_t
n_dims
=
batch_lens
[
op
.
axis
];
size_t
n_dims
=
batch_lens
[
op
.
axis
];
batch_lens
[
op
.
axis
]
=
1
;
batch_lens
[
op
.
axis
]
=
1
;
shape
batch_shape
{
shape
::
int32_type
,
batch_lens
};
shape
batch_shape
{
shape
::
int32_type
,
batch_lens
};
...
@@ -561,31 +561,31 @@ struct cpu_softmax
...
@@ -561,31 +561,31 @@ struct cpu_softmax
par_for
(
batch_shape
.
elements
(),
[
&
](
auto
i
)
{
par_for
(
batch_shape
.
elements
(),
[
&
](
auto
i
)
{
auto
idx
=
compute_batch_indices
(
i
,
batch_shape
);
auto
idx
=
compute_batch_indices
(
i
,
batch_shape
);
for
(
size_t
j
=
0
;
j
<
n_dims
;
++
j
)
for
(
size_t
j
=
0
;
j
<
n_dims
;
++
j
)
{
{
idx
[
op
.
axis
]
=
j
;
idx
[
op
.
axis
]
=
j
;
batch_max
[
i
]
=
std
::
max
(
batch_max
[
i
],
input
(
idx
.
begin
(),
idx
.
end
()));
batch_max
[
i
]
=
std
::
max
(
batch_max
[
i
],
input
(
idx
.
begin
(),
idx
.
end
()));
}
}
for
(
size_t
j
=
0
;
j
<
n_dims
;
++
j
)
for
(
size_t
j
=
0
;
j
<
n_dims
;
++
j
)
{
{
idx
[
op
.
axis
]
=
j
;
idx
[
op
.
axis
]
=
j
;
size_t
index
=
output_shape
.
index
(
idx
);
size_t
index
=
output_shape
.
index
(
idx
);
output
[
index
]
=
std
::
exp
(
input
[
index
]
-
batch_max
[
i
]);
output
[
index
]
=
std
::
exp
(
input
[
index
]
-
batch_max
[
i
]);
}
}
for
(
size_t
j
=
0
;
j
<
n_dims
;
++
j
)
for
(
size_t
j
=
0
;
j
<
n_dims
;
++
j
)
{
{
idx
[
op
.
axis
]
=
j
;
idx
[
op
.
axis
]
=
j
;
batch_sum
[
i
]
+=
output
(
idx
.
begin
(),
idx
.
end
());
batch_sum
[
i
]
+=
output
(
idx
.
begin
(),
idx
.
end
());
}
}
for
(
size_t
j
=
0
;
j
<
n_dims
;
++
j
)
for
(
size_t
j
=
0
;
j
<
n_dims
;
++
j
)
{
{
idx
[
op
.
axis
]
=
j
;
idx
[
op
.
axis
]
=
j
;
output
(
idx
.
begin
(),
idx
.
end
())
/=
batch_sum
[
i
];
output
(
idx
.
begin
(),
idx
.
end
())
/=
batch_sum
[
i
];
}
}
});
});
});
});
return
result
;
return
result
;
...
@@ -624,7 +624,7 @@ struct cpu_logsoftmax
...
@@ -624,7 +624,7 @@ struct cpu_logsoftmax
{
{
argument
result
{
output_shape
};
argument
result
{
output_shape
};
auto
batch_lens
=
output_shape
.
lens
();
auto
batch_lens
=
output_shape
.
lens
();
size_t
n_dims
=
batch_lens
[
op
.
axis
];
size_t
n_dims
=
batch_lens
[
op
.
axis
];
batch_lens
[
op
.
axis
]
=
1
;
batch_lens
[
op
.
axis
]
=
1
;
shape
batch_shape
{
shape
::
int32_type
,
batch_lens
};
shape
batch_shape
{
shape
::
int32_type
,
batch_lens
};
...
@@ -638,20 +638,20 @@ struct cpu_logsoftmax
...
@@ -638,20 +638,20 @@ struct cpu_logsoftmax
par_for
(
batch_shape
.
elements
(),
[
&
](
auto
i
)
{
par_for
(
batch_shape
.
elements
(),
[
&
](
auto
i
)
{
auto
idx
=
compute_batch_indices
(
i
,
batch_shape
);
auto
idx
=
compute_batch_indices
(
i
,
batch_shape
);
for
(
size_t
j
=
0
;
j
<
n_dims
;
++
j
)
for
(
size_t
j
=
0
;
j
<
n_dims
;
++
j
)
{
{
idx
[
op
.
axis
]
=
j
;
idx
[
op
.
axis
]
=
j
;
batch_max
[
i
]
=
std
::
max
(
batch_max
[
i
],
input
(
idx
.
begin
(),
idx
.
end
()));
batch_max
[
i
]
=
std
::
max
(
batch_max
[
i
],
input
(
idx
.
begin
(),
idx
.
end
()));
}
}
for
(
size_t
j
=
0
;
j
<
n_dims
;
++
j
)
for
(
size_t
j
=
0
;
j
<
n_dims
;
++
j
)
{
{
idx
[
op
.
axis
]
=
j
;
idx
[
op
.
axis
]
=
j
;
size_t
index
=
output_shape
.
index
(
idx
);
size_t
index
=
output_shape
.
index
(
idx
);
output
[
index
]
=
input
[
index
]
-
batch_max
[
i
];
output
[
index
]
=
input
[
index
]
-
batch_max
[
i
];
}
}
for
(
size_t
j
=
0
;
j
<
n_dims
;
++
j
)
for
(
size_t
j
=
0
;
j
<
n_dims
;
++
j
)
{
{
idx
[
op
.
axis
]
=
j
;
idx
[
op
.
axis
]
=
j
;
batch_sum
[
i
]
+=
std
::
exp
(
output
(
idx
.
begin
(),
idx
.
end
()));
batch_sum
[
i
]
+=
std
::
exp
(
output
(
idx
.
begin
(),
idx
.
end
()));
...
@@ -659,7 +659,7 @@ struct cpu_logsoftmax
...
@@ -659,7 +659,7 @@ struct cpu_logsoftmax
batch_sum
[
i
]
=
std
::
log
(
batch_sum
[
i
]);
batch_sum
[
i
]
=
std
::
log
(
batch_sum
[
i
]);
for
(
size_t
j
=
0
;
j
<
n_dims
;
++
j
)
for
(
size_t
j
=
0
;
j
<
n_dims
;
++
j
)
{
{
idx
[
op
.
axis
]
=
j
;
idx
[
op
.
axis
]
=
j
;
output
(
idx
.
begin
(),
idx
.
end
())
-=
batch_sum
[
i
];
output
(
idx
.
begin
(),
idx
.
end
())
-=
batch_sum
[
i
];
...
...
src/targets/gpu/device/logsoftmax.cpp
View file @
6d1c23e9
...
@@ -17,10 +17,10 @@ argument logsoftmax(hipStream_t stream,
...
@@ -17,10 +17,10 @@ argument logsoftmax(hipStream_t stream,
int
axis
)
int
axis
)
{
{
auto
lens
=
output_shape
.
lens
();
auto
lens
=
output_shape
.
lens
();
auto
n_dims
=
lens
[
axis
];
auto
n_dims
=
lens
[
axis
];
auto
batch_lens
=
lens
;
auto
batch_lens
=
lens
;
batch_lens
[
axis
]
=
1
;
batch_lens
[
axis
]
=
1
;
migraphx
::
shape
batch_shape
{
output_shape
.
type
(),
batch_lens
};
migraphx
::
shape
batch_shape
{
output_shape
.
type
(),
batch_lens
};
visit_all
(
args
.
back
(),
args
.
front
())([
&
](
auto
output
,
auto
input
)
{
visit_all
(
args
.
back
(),
args
.
front
())([
&
](
auto
output
,
auto
input
)
{
...
@@ -34,8 +34,8 @@ argument logsoftmax(hipStream_t stream,
...
@@ -34,8 +34,8 @@ argument logsoftmax(hipStream_t stream,
// opt 1, load all data to lds then use the same approach as
// opt 1, load all data to lds then use the same approach as
// the current optimization
// the current optimization
const
size_t
max_block_size
=
1024
;
const
size_t
max_block_size
=
1024
;
size_t
block_size
=
1
;
size_t
block_size
=
1
;
while
(
block_size
<
max_block_size
and
block_size
<
n_dim
)
while
(
block_size
<
max_block_size
and
block_size
<
n_dim
)
{
{
block_size
*=
2
;
block_size
*=
2
;
}
}
...
@@ -53,14 +53,14 @@ argument logsoftmax(hipStream_t stream,
...
@@ -53,14 +53,14 @@ argument logsoftmax(hipStream_t stream,
auto
data_idx
=
batch_idx
;
auto
data_idx
=
batch_idx
;
// load data to lds and compute the batch max
// load data to lds and compute the batch max
size_t
item_num
=
n_dims
;
size_t
item_num
=
n_dims
;
size_t
thread_num
=
(
n_dims
+
block_size
-
1
)
/
block_size
*
block_size
;
size_t
thread_num
=
(
n_dims
+
block_size
-
1
)
/
block_size
*
block_size
;
lds_data
[
block_size
]
=
input_ptr
[
0
];
lds_data
[
block_size
]
=
input_ptr
[
0
];
for
(
size_t
i
=
thr_idx
;
i
<
thread_num
;
i
+=
block_size
)
for
(
size_t
i
=
thr_idx
;
i
<
thread_num
;
i
+=
block_size
)
{
{
if
(
i
<
n_dims
)
if
(
i
<
n_dims
)
{
{
data_idx
[
axis
]
=
i
;
data_idx
[
axis
]
=
i
;
lds_data
[
thr_idx
]
=
input_ptr
[
desc_data
.
linear
(
data_idx
)];
lds_data
[
thr_idx
]
=
input_ptr
[
desc_data
.
linear
(
data_idx
)];
}
}
__syncthreads
();
__syncthreads
();
...
@@ -97,13 +97,14 @@ argument logsoftmax(hipStream_t stream,
...
@@ -97,13 +97,14 @@ argument logsoftmax(hipStream_t stream,
item_num
=
n_dims
;
item_num
=
n_dims
;
for
(
size_t
i
=
thr_idx
;
i
<
thread_num
;
i
+=
block_size
)
for
(
size_t
i
=
thr_idx
;
i
<
thread_num
;
i
+=
block_size
)
{
{
if
(
i
<
n_dims
)
if
(
i
<
n_dims
)
{
{
data_idx
[
axis
]
=
i
;
data_idx
[
axis
]
=
i
;
lds_data
[
thr_idx
]
=
input_ptr
[
desc_data
.
linear
(
data_idx
)]
-
lds_data
[
block_size
];
lds_data
[
thr_idx
]
=
lds_data
[
thr_idx
]
=
::
exp
(
to_hip_type
(
lds_data
[
thr_idx
]));
input_ptr
[
desc_data
.
linear
(
data_idx
)]
-
lds_data
[
block_size
];
lds_data
[
thr_idx
]
=
::
exp
(
to_hip_type
(
lds_data
[
thr_idx
]));
}
}
__syncthreads
();
__syncthreads
();
auto
size
=
(
item_num
>
block_size
)
?
block_size
:
item_num
;
auto
size
=
(
item_num
>
block_size
)
?
block_size
:
item_num
;
...
...
src/targets/gpu/device/softmax.cpp
View file @
6d1c23e9
...
@@ -32,8 +32,8 @@ argument softmax(hipStream_t stream,
...
@@ -32,8 +32,8 @@ argument softmax(hipStream_t stream,
// use one block for items in one batch.
// use one block for items in one batch.
const
size_t
max_block_size
=
1024
;
const
size_t
max_block_size
=
1024
;
size_t
block_size
=
1
;
size_t
block_size
=
1
;
while
(
block_size
<
max_block_size
and
block_size
<
n_dims
)
while
(
block_size
<
max_block_size
and
block_size
<
n_dims
)
{
{
block_size
*=
2
;
block_size
*=
2
;
}
}
...
@@ -50,16 +50,16 @@ argument softmax(hipStream_t stream,
...
@@ -50,16 +50,16 @@ argument softmax(hipStream_t stream,
auto
batch_idx
=
desc_batch
.
multi
(
blk_idx
);
auto
batch_idx
=
desc_batch
.
multi
(
blk_idx
);
auto
data_idx
=
batch_idx
;
auto
data_idx
=
batch_idx
;
// load data to lds and compute the batch max
// load data to lds and compute the batch max
size_t
item_num
=
n_dims
;
size_t
item_num
=
n_dims
;
size_t
thread_num
=
(
n_dims
+
block_size
-
1
)
/
block_size
*
block_size
;
size_t
thread_num
=
(
n_dims
+
block_size
-
1
)
/
block_size
*
block_size
;
lds_data
[
block_size
]
=
input_ptr
[
0
];
lds_data
[
block_size
]
=
input_ptr
[
0
];
lds_data
[
block_size
+
1
]
=
0
;
lds_data
[
block_size
+
1
]
=
0
;
for
(
size_t
i
=
thr_idx
;
i
<
thread_num
;
i
+=
block_size
)
for
(
size_t
i
=
thr_idx
;
i
<
thread_num
;
i
+=
block_size
)
{
{
if
(
i
<
n_dims
)
if
(
i
<
n_dims
)
{
{
data_idx
[
axis
]
=
i
;
data_idx
[
axis
]
=
i
;
lds_data
[
thr_idx
]
=
input_ptr
[
desc_data
.
linear
(
data_idx
)];
lds_data
[
thr_idx
]
=
input_ptr
[
desc_data
.
linear
(
data_idx
)];
}
}
__syncthreads
();
__syncthreads
();
...
@@ -92,14 +92,15 @@ argument softmax(hipStream_t stream,
...
@@ -92,14 +92,15 @@ argument softmax(hipStream_t stream,
item_num
-=
block_size
;
item_num
-=
block_size
;
}
}
item_num
=
n_dims
;
item_num
=
n_dims
;
for
(
size_t
i
=
thr_idx
;
i
<
thread_num
;
i
+=
block_size
)
for
(
size_t
i
=
thr_idx
;
i
<
thread_num
;
i
+=
block_size
)
{
{
if
(
i
<
n_dims
)
if
(
i
<
n_dims
)
{
{
data_idx
[
axis
]
=
i
;
data_idx
[
axis
]
=
i
;
lds_data
[
thr_idx
]
=
input_ptr
[
desc_data
.
linear
(
data_idx
)]
-
lds_data
[
block_size
];
lds_data
[
thr_idx
]
=
lds_data
[
thr_idx
]
=
::
exp
(
to_hip_type
(
lds_data
[
thr_idx
]));
input_ptr
[
desc_data
.
linear
(
data_idx
)]
-
lds_data
[
block_size
];
lds_data
[
thr_idx
]
=
::
exp
(
to_hip_type
(
lds_data
[
thr_idx
]));
}
}
__syncthreads
();
__syncthreads
();
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment