Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
5f37917f
"src/include/Sequence.hpp" did not exist on "766b0a9eafe29a5d2a75c350345e54165ceaf405"
Commit
5f37917f
authored
Feb 28, 2022
by
Shucai Xiao
Browse files
clang format
parent
f50bcff2
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
28 additions
and
27 deletions
+28
-27
src/targets/gpu/device/add.cpp
src/targets/gpu/device/add.cpp
+9
-8
src/targets/gpu/device/contiguous.cpp
src/targets/gpu/device/contiguous.cpp
+8
-8
src/targets/gpu/device/include/migraphx/gpu/device/multi_index.hpp
...ts/gpu/device/include/migraphx/gpu/device/multi_index.hpp
+2
-2
src/targets/gpu/device/mul.cpp
src/targets/gpu/device/mul.cpp
+9
-9
No files found.
src/targets/gpu/device/add.cpp
View file @
5f37917f
...
@@ -11,27 +11,28 @@ namespace device {
...
@@ -11,27 +11,28 @@ namespace device {
__global__
void
add_kernel
(
__half
*
a
,
__half
*
b
,
__half
*
r
,
int
n
)
__global__
void
add_kernel
(
__half
*
a
,
__half
*
b
,
__half
*
r
,
int
n
)
{
{
int
tid
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
tid
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
if
(
tid
<
n
)
if
(
tid
<
n
)
{
{
r
[
tid
]
=
a
[
tid
]
+
b
[
tid
%
768
];
r
[
tid
]
=
a
[
tid
]
+
b
[
tid
%
768
];
}
}
}
}
void
add
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
)
void
add
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
)
{
{
auto
s2
=
arg2
.
get_shape
();
auto
s2
=
arg2
.
get_shape
();
if
(
s2
.
element_space
()
==
768
and
s2
.
type
()
==
shape
::
half_type
)
if
(
s2
.
element_space
()
==
768
and
s2
.
type
()
==
shape
::
half_type
)
{
{
auto
elem_num
=
s2
.
elements
();
auto
elem_num
=
s2
.
elements
();
int
block_size
=
1024
;
int
block_size
=
1024
;
int
block_num
=
(
elem_num
+
block_size
-
1
)
/
block_size
;
int
block_num
=
(
elem_num
+
block_size
-
1
)
/
block_size
;
add_kernel
<<<
block_num
,
block_size
>>>
(
reinterpret_cast
<
__half
*>
(
arg1
.
data
()),
add_kernel
<<<
block_num
,
block_size
>>>
(
reinterpret_cast
<
__half
*>
(
arg1
.
data
()),
reinterpret_cast
<
__half
*>
(
arg2
.
data
()),
reinterpret_cast
<
__half
*>
(
arg2
.
data
()),
reinterpret_cast
<
__half
*>
(
result
.
data
()),
elem_num
);
reinterpret_cast
<
__half
*>
(
result
.
data
()),
elem_num
);
}
}
else
else
{
{
nary
(
stream
,
result
,
arg1
,
arg2
)([](
auto
x
,
auto
y
)
__device__
{
return
x
+
y
;
});
nary
(
stream
,
result
,
arg1
,
arg2
)([](
auto
x
,
auto
y
)
__device__
{
return
x
+
y
;
});
}
}
}
}
...
...
src/targets/gpu/device/contiguous.cpp
View file @
5f37917f
...
@@ -14,7 +14,7 @@ void contiguous_nonstandard(hipStream_t stream, const argument& result, const ar
...
@@ -14,7 +14,7 @@ void contiguous_nonstandard(hipStream_t stream, const argument& result, const ar
visit_all
(
result
,
arg
)([
&
](
auto
output_v
,
auto
input_v
)
{
visit_all
(
result
,
arg
)([
&
](
auto
output_v
,
auto
input_v
)
{
hip_visit_views
(
output_v
,
input_v
,
s
)([
&
](
auto
output
,
auto
input
,
auto
standard_shape
)
{
hip_visit_views
(
output_v
,
input_v
,
s
)([
&
](
auto
output
,
auto
input
,
auto
standard_shape
)
{
gs_launch
(
stream
,
s
.
elements
())([
=
](
auto
i
)
__device__
{
gs_launch
(
stream
,
s
.
elements
())([
=
](
auto
i
)
__device__
{
auto
idx
=
standard_shape
.
multi
(
i
);
auto
idx
=
standard_shape
.
multi
(
i
);
output
[
idx
]
=
input
[
idx
];
output
[
idx
]
=
input
[
idx
];
});
});
// mi_gs_launch(stream,
// mi_gs_launch(stream,
...
@@ -34,8 +34,8 @@ void contiguous_packed(hipStream_t stream, const argument& result, const argumen
...
@@ -34,8 +34,8 @@ void contiguous_packed(hipStream_t stream, const argument& result, const argumen
// auto* output = device_cast(output_v.data());
// auto* output = device_cast(output_v.data());
// const __half2* input2 = reinterpret_cast<__half2*>(input_v.data());
// const __half2* input2 = reinterpret_cast<__half2*>(input_v.data());
// __half2* output2 = reinterpret_cast<__half2*>(output_v.data());
// __half2* output2 = reinterpret_cast<__half2*>(output_v.data());
// gs_launch(stream, nelements / 2)([=](auto i) __device__ {
// gs_launch(stream, nelements / 2)([=](auto i) __device__ {
// output2[i] = input2[i];
// output2[i] = input2[i];
// if (i == 0 and (nelements % 2) == 1)
// if (i == 0 and (nelements % 2) == 1)
// {
// {
// output[nelements - 1] = input[nelements - 1];
// output[nelements - 1] = input[nelements - 1];
...
@@ -45,11 +45,11 @@ void contiguous_packed(hipStream_t stream, const argument& result, const argumen
...
@@ -45,11 +45,11 @@ void contiguous_packed(hipStream_t stream, const argument& result, const argumen
// }
// }
// else
// else
// {
// {
visit_all
(
result
,
arg
)([
&
](
auto
output_v
,
auto
input_v
)
{
visit_all
(
result
,
arg
)([
&
](
auto
output_v
,
auto
input_v
)
{
const
auto
*
input
=
device_cast
(
input_v
.
data
());
const
auto
*
input
=
device_cast
(
input_v
.
data
());
auto
*
output
=
device_cast
(
output_v
.
data
());
auto
*
output
=
device_cast
(
output_v
.
data
());
gs_launch
(
stream
,
nelements
)([
=
](
auto
i
)
__device__
{
output
[
i
]
=
input
[
i
];
});
gs_launch
(
stream
,
nelements
)([
=
](
auto
i
)
__device__
{
output
[
i
]
=
input
[
i
];
});
});
});
// }
// }
}
}
...
...
src/targets/gpu/device/include/migraphx/gpu/device/multi_index.hpp
View file @
5f37917f
...
@@ -57,8 +57,8 @@ inline auto mi_nglobal(const hip_shape<N>& s, index_int nlocal)
...
@@ -57,8 +57,8 @@ inline auto mi_nglobal(const hip_shape<N>& s, index_int nlocal)
{
{
assert
(
s
.
standard
);
assert
(
s
.
standard
);
assert
(
s
.
elements
()
>
0
);
assert
(
s
.
elements
()
>
0
);
index_int
n
=
s
.
elements
();
index_int
n
=
s
.
elements
();
index_int
groups
=
(
n
+
nlocal
-
1
)
/
nlocal
;
index_int
groups
=
(
n
+
nlocal
-
1
)
/
nlocal
;
// change the max group num to 1 Million
// change the max group num to 1 Million
index_int
nglobal
=
std
::
min
<
index_int
>
((
1
<<
20
),
groups
)
*
nlocal
;
index_int
nglobal
=
std
::
min
<
index_int
>
((
1
<<
20
),
groups
)
*
nlocal
;
...
...
src/targets/gpu/device/mul.cpp
View file @
5f37917f
...
@@ -11,28 +11,28 @@ namespace device {
...
@@ -11,28 +11,28 @@ namespace device {
__global__
void
mul_kernel
(
__half
*
a
,
__half
*
b
,
__half
*
r
,
int
n
)
__global__
void
mul_kernel
(
__half
*
a
,
__half
*
b
,
__half
*
r
,
int
n
)
{
{
int
tid
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
tid
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
if
(
tid
<
n
)
if
(
tid
<
n
)
{
{
r
[
tid
]
=
a
[
tid
]
*
b
[
tid
%
768
];
r
[
tid
]
=
a
[
tid
]
*
b
[
tid
%
768
];
}
}
}
}
void
mul
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
)
void
mul
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
)
{
{
auto
s2
=
arg2
.
get_shape
();
auto
s2
=
arg2
.
get_shape
();
if
(
s2
.
element_space
()
==
768
and
s2
.
type
()
==
shape
::
half_type
)
if
(
s2
.
element_space
()
==
768
and
s2
.
type
()
==
shape
::
half_type
)
{
{
auto
elem_num
=
s2
.
elements
();
auto
elem_num
=
s2
.
elements
();
int
block_size
=
1024
;
int
block_size
=
1024
;
int
block_num
=
(
elem_num
+
block_size
-
1
)
/
block_size
;
int
block_num
=
(
elem_num
+
block_size
-
1
)
/
block_size
;
mul_kernel
<<<
block_num
,
block_size
>>>
(
reinterpret_cast
<
__half
*>
(
arg1
.
data
()),
mul_kernel
<<<
block_num
,
block_size
>>>
(
reinterpret_cast
<
__half
*>
(
arg1
.
data
()),
reinterpret_cast
<
__half
*>
(
arg2
.
data
()),
reinterpret_cast
<
__half
*>
(
arg2
.
data
()),
reinterpret_cast
<
__half
*>
(
result
.
data
()),
elem_num
);
reinterpret_cast
<
__half
*>
(
result
.
data
()),
elem_num
);
}
}
else
else
{
{
nary
(
stream
,
result
,
arg1
,
arg2
)([](
auto
x
,
auto
y
)
__device__
{
return
x
*
y
;
});
nary
(
stream
,
result
,
arg1
,
arg2
)([](
auto
x
,
auto
y
)
__device__
{
return
x
*
y
;
});
}
}
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment