Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
ed9ff879
Commit
ed9ff879
authored
Aug 23, 2018
by
Paul
Browse files
Move kernel launch to a seperate header
parent
0bedc5e8
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
62 additions
and
40 deletions
+62
-40
src/targets/gpu/CMakeLists.txt
src/targets/gpu/CMakeLists.txt
+1
-1
src/targets/gpu/device/contiguous.cpp
src/targets/gpu/device/contiguous.cpp
+6
-39
src/targets/gpu/device/include/migraph/gpu/device/launch.hpp
src/targets/gpu/device/include/migraph/gpu/device/launch.hpp
+55
-0
No files found.
src/targets/gpu/CMakeLists.txt
View file @
ed9ff879
...
@@ -16,7 +16,7 @@ add_library(migraph_device
...
@@ -16,7 +16,7 @@ add_library(migraph_device
rocm_clang_tidy_check
(
migraph_device
)
rocm_clang_tidy_check
(
migraph_device
)
target_link_libraries
(
migraph_device migraph hip::device
)
target_link_libraries
(
migraph_device migraph hip::device
)
target_include_directories
(
migraph_device PUBLIC $<BUILD_INTERFACE:
${
CMAKE_CURRENT_SOURCE_DIR
}
/include>
)
target_include_directories
(
migraph_device PUBLIC $<BUILD_INTERFACE:
${
CMAKE_CURRENT_SOURCE_DIR
}
/include>
)
target_include_directories
(
migraph_device PRIVATE $<BUILD_INTERFACE:
${
CMAKE_CURRENT_SOURCE_DIR
}
/device>
)
target_include_directories
(
migraph_device PRIVATE $<BUILD_INTERFACE:
${
CMAKE_CURRENT_SOURCE_DIR
}
/device
/include
>
)
add_library
(
migraph_gpu
add_library
(
migraph_gpu
eliminate_allocation.cpp
eliminate_allocation.cpp
...
...
src/targets/gpu/device/contiguous.cpp
View file @
ed9ff879
#include <hip/hip_runtime.h>
#include <migraph/gpu/device/contiguous.hpp>
#include <migraph/gpu/device/contiguous.hpp>
#include <migraph/gpu/device/launch.hpp>
namespace
migraph
{
namespace
migraph
{
namespace
gpu
{
namespace
gpu
{
namespace
device
{
namespace
device
{
struct
index
{
std
::
size_t
global
;
std
::
size_t
local
;
std
::
size_t
group
;
};
template
<
class
F
>
__global__
void
launcher
(
F
f
)
{
index
idx
{
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
,
threadIdx
.
x
,
blockIdx
.
x
};
f
(
idx
);
}
auto
launch
(
std
::
size_t
global
,
std
::
size_t
local
)
{
return
[
=
](
auto
f
)
{
assert
(
local
>
0
);
assert
(
global
>
0
);
using
f_type
=
decltype
(
f
);
dim3
nblocks
(
global
/
local
);
dim3
nthreads
(
local
);
hipLaunchKernelGGL
((
launcher
<
f_type
>
),
nblocks
,
nthreads
,
0
,
nullptr
,
f
);
};
}
template
<
class
F
>
template
<
class
F
>
void
visit_tensor_size
(
std
::
size_t
n
,
F
f
)
void
visit_tensor_size
(
std
::
size_t
n
,
F
f
)
{
{
...
@@ -86,7 +60,7 @@ struct hip_tensor_descriptor
...
@@ -86,7 +60,7 @@ struct hip_tensor_descriptor
for
(
size_t
i
=
0
;
i
<
NDim
;
i
++
)
for
(
size_t
i
=
0
;
i
<
NDim
;
i
++
)
strides
[
i
]
=
strides_ext
[
i
];
strides
[
i
]
=
strides_ext
[
i
];
}
}
__device__
__host__
hip_index
<
NDim
>
multi
(
size_t
idx
)
__device__
__host__
hip_index
<
NDim
>
multi
(
size_t
idx
)
const
{
{
hip_index
<
NDim
>
result
{};
hip_index
<
NDim
>
result
{};
size_t
tidx
=
idx
;
size_t
tidx
=
idx
;
...
@@ -97,7 +71,7 @@ struct hip_tensor_descriptor
...
@@ -97,7 +71,7 @@ struct hip_tensor_descriptor
}
}
return
result
;
return
result
;
}
}
__device__
__host__
size_t
linear
(
hip_index
<
NDim
>
s
)
__device__
__host__
size_t
linear
(
hip_index
<
NDim
>
s
)
const
{
{
size_t
idx
=
0
;
size_t
idx
=
0
;
for
(
size_t
i
=
0
;
i
<
NDim
;
i
++
)
for
(
size_t
i
=
0
;
i
<
NDim
;
i
++
)
...
@@ -117,16 +91,9 @@ void contiguous(shape output_shape, argument arg, argument result)
...
@@ -117,16 +91,9 @@ void contiguous(shape output_shape, argument arg, argument result)
hip_tensor_descriptor
<
ndim
>
at_desc
(
output_shape
.
lens
(),
output_shape
.
strides
());
hip_tensor_descriptor
<
ndim
>
at_desc
(
output_shape
.
lens
(),
output_shape
.
strides
());
auto
*
a
=
input
.
data
();
auto
*
a
=
input
.
data
();
auto
*
at
=
output
.
data
();
auto
*
at
=
output
.
data
();
auto
nelements
=
s
.
elements
();
gs_launch
(
s
.
elements
())([
=
](
auto
i
)
{
std
::
size_t
nlocal
=
512
;
std
::
size_t
nglobal
=
512
*
nlocal
;
launch
(
nglobal
,
nlocal
)([
=
](
auto
idx
)
mutable
{
for
(
size_t
i
=
idx
.
global
;
i
<
nelements
;
i
+=
nglobal
)
{
size_t
lidx
=
a_desc
.
linear
(
at_desc
.
multi
(
i
));
size_t
lidx
=
a_desc
.
linear
(
at_desc
.
multi
(
i
));
at
[
i
]
=
a
[
lidx
];
at
[
i
]
=
a
[
lidx
];
}
});
});
});
});
});
});
...
...
src/targets/gpu/device/include/migraph/gpu/device/launch.hpp
0 → 100644
View file @
ed9ff879
#ifndef MIGRAPH_GUARD_RTGLIB_DEVICE_LAUNCH_HPP
#define MIGRAPH_GUARD_RTGLIB_DEVICE_LAUNCH_HPP
#include <hip/hip_runtime.h>
namespace
migraph
{
namespace
gpu
{
namespace
device
{
struct
index
{
std
::
size_t
global
;
std
::
size_t
local
;
std
::
size_t
group
;
};
template
<
class
F
>
__global__
void
launcher
(
F
f
)
{
index
idx
{
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
,
threadIdx
.
x
,
blockIdx
.
x
};
f
(
idx
);
}
auto
launch
(
std
::
size_t
global
,
std
::
size_t
local
)
{
return
[
=
](
auto
f
)
{
assert
(
local
>
0
);
assert
(
global
>
0
);
using
f_type
=
decltype
(
f
);
dim3
nblocks
(
global
/
local
);
dim3
nthreads
(
local
);
hipLaunchKernelGGL
((
launcher
<
f_type
>
),
nblocks
,
nthreads
,
0
,
nullptr
,
f
);
};
}
auto
gs_launch
(
std
::
size_t
n
,
std
::
size_t
local
=
512
)
{
std
::
size_t
groups
=
1
+
n
/
local
;
std
::
size_t
nglobal
=
std
::
min
<
std
::
size_t
>
(
512
,
groups
)
*
local
;
return
[
=
](
auto
f
)
{
launch
(
nglobal
,
local
)([
=
](
auto
idx
)
{
for
(
size_t
i
=
idx
.
global
;
i
<
n
;
i
+=
nglobal
)
{
f
(
i
);
}
});
};
}
}
// namespace device
}
// namespace gpu
}
// namespace migraph
#endif
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment