Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
cff1144d
"vscode:/vscode.git/clone" did not exist on "645aa04ff15d015e753c206dc74cda07c3040f44"
Commit
cff1144d
authored
Aug 23, 2018
by
Paul
Browse files
Add add_relu device function
parent
48bf509a
Changes
10
Show whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
254 additions
and
94 deletions
+254
-94
src/targets/gpu/CMakeLists.txt
src/targets/gpu/CMakeLists.txt
+1
-0
src/targets/gpu/device/add_relu.cpp
src/targets/gpu/device/add_relu.cpp
+17
-0
src/targets/gpu/device/contiguous.cpp
src/targets/gpu/device/contiguous.cpp
+3
-90
src/targets/gpu/device/include/migraph/gpu/device/binary.hpp
src/targets/gpu/device/include/migraph/gpu/device/binary.hpp
+63
-0
src/targets/gpu/device/include/migraph/gpu/device/launch.hpp
src/targets/gpu/device/include/migraph/gpu/device/launch.hpp
+2
-2
src/targets/gpu/device/include/migraph/gpu/device/tensor.hpp
src/targets/gpu/device/include/migraph/gpu/device/tensor.hpp
+90
-0
src/targets/gpu/device/include/migraph/gpu/device/unary.hpp
src/targets/gpu/device/include/migraph/gpu/device/unary.hpp
+58
-0
src/targets/gpu/include/migraph/gpu/device/add_relu.hpp
src/targets/gpu/include/migraph/gpu/device/add_relu.hpp
+17
-0
src/targets/gpu/include/migraph/gpu/device/contiguous.hpp
src/targets/gpu/include/migraph/gpu/device/contiguous.hpp
+1
-1
src/targets/gpu/lowering.cpp
src/targets/gpu/lowering.cpp
+2
-1
No files found.
src/targets/gpu/CMakeLists.txt
View file @
cff1144d
...
...
@@ -11,6 +11,7 @@ if(NOT TARGET MIOpen)
endif
()
add_library
(
migraph_device
device/add_relu.cpp
device/contiguous.cpp
)
rocm_clang_tidy_check
(
migraph_device
)
...
...
src/targets/gpu/device/add_relu.cpp
0 → 100644
View file @
cff1144d
#include <migraph/gpu/device/contiguous.hpp>
#include <migraph/gpu/device/binary.hpp>
namespace
migraph
{
namespace
gpu
{
namespace
device
{
void
add_relu
(
argument
arg1
,
argument
arg2
,
argument
result
)
{
binary_standard
(
arg1
,
arg2
,
result
,
[](
auto
x
,
auto
y
)
{
return
max
(
0
,
x
+
y
);
});
}
}
// namespace device
}
// namespace gpu
}
// namespace migraph
src/targets/gpu/device/contiguous.cpp
View file @
cff1144d
#include <migraph/gpu/device/contiguous.hpp>
#include <migraph/gpu/device/
launch
.hpp>
#include <migraph/gpu/device/
unary
.hpp>
namespace
migraph
{
namespace
gpu
{
namespace
device
{
template
<
class
F
>
void
visit_tensor_size
(
std
::
size_t
n
,
F
f
)
void
contiguous
(
argument
arg
,
argument
result
)
{
switch
(
n
)
{
case
1
:
{
f
(
std
::
integral_constant
<
std
::
size_t
,
1
>
{});
break
;
}
case
2
:
{
f
(
std
::
integral_constant
<
std
::
size_t
,
2
>
{});
break
;
}
case
3
:
{
f
(
std
::
integral_constant
<
std
::
size_t
,
3
>
{});
break
;
}
case
4
:
{
f
(
std
::
integral_constant
<
std
::
size_t
,
4
>
{});
break
;
}
case
5
:
{
f
(
std
::
integral_constant
<
std
::
size_t
,
5
>
{});
break
;
}
default:
throw
std
::
runtime_error
(
"Unknown tensor size"
);
}
unary_nonstandard
(
arg
,
result
,
[](
auto
x
)
{
return
x
;
});
}
template
<
size_t
NDim
>
struct
hip_index
{
size_t
d
[
NDim
];
__device__
__host__
size_t
&
operator
[](
size_t
i
)
{
return
d
[
i
];
}
__device__
__host__
size_t
operator
[](
size_t
i
)
const
{
return
d
[
i
];
}
};
template
<
size_t
NDim
>
struct
hip_tensor_descriptor
{
__device__
__host__
hip_tensor_descriptor
()
=
default
;
template
<
typename
T
,
typename
V
>
__device__
__host__
hip_tensor_descriptor
(
const
T
&
lens_ext
,
const
V
&
strides_ext
)
{
for
(
size_t
i
=
0
;
i
<
NDim
;
i
++
)
lens
[
i
]
=
lens_ext
[
i
];
for
(
size_t
i
=
0
;
i
<
NDim
;
i
++
)
strides
[
i
]
=
strides_ext
[
i
];
}
__device__
__host__
hip_index
<
NDim
>
multi
(
size_t
idx
)
const
{
hip_index
<
NDim
>
result
{};
size_t
tidx
=
idx
;
for
(
size_t
is
=
0
;
is
<
NDim
;
is
++
)
{
result
[
is
]
=
tidx
/
strides
[
is
];
tidx
=
tidx
%
strides
[
is
];
}
return
result
;
}
__device__
__host__
size_t
linear
(
hip_index
<
NDim
>
s
)
const
{
size_t
idx
=
0
;
for
(
size_t
i
=
0
;
i
<
NDim
;
i
++
)
idx
+=
s
[
i
]
*
strides
[
i
];
return
idx
;
}
size_t
lens
[
NDim
]
=
{};
size_t
strides
[
NDim
]
=
{};
};
void
contiguous
(
shape
output_shape
,
argument
arg
,
argument
result
)
{
visit_all
(
result
,
arg
)([
&
](
auto
output
,
auto
input
)
{
visit_tensor_size
(
output_shape
.
lens
().
size
(),
[
&
](
auto
ndim
)
{
const
auto
&
s
=
arg
.
get_shape
();
hip_tensor_descriptor
<
ndim
>
a_desc
(
s
.
lens
(),
s
.
strides
());
hip_tensor_descriptor
<
ndim
>
at_desc
(
output_shape
.
lens
(),
output_shape
.
strides
());
auto
*
a
=
input
.
data
();
auto
*
at
=
output
.
data
();
gs_launch
(
s
.
elements
())([
=
](
auto
i
)
{
size_t
lidx
=
a_desc
.
linear
(
at_desc
.
multi
(
i
));
at
[
i
]
=
a
[
lidx
];
});
});
});
}
}
// namespace device
}
// namespace gpu
}
// namespace migraph
src/targets/gpu/device/include/migraph/gpu/device/binary.hpp
0 → 100644
View file @
cff1144d
#ifndef MIGRAPH_GUARD_RTGLIB_DEVICE_BINARY_HPP
#define MIGRAPH_GUARD_RTGLIB_DEVICE_BINARY_HPP
#include <migraph/gpu/device/tensor.hpp>
#include <migraph/gpu/device/launch.hpp>
namespace
migraph
{
namespace
gpu
{
namespace
device
{
template
<
class
F
>
void
binary
(
argument
x
,
argument
y
,
argument
result
,
F
f
)
{
if
(
x
.
get_shape
().
standard
())
binary_standard
(
x
,
y
,
result
,
f
);
else
binary_nonstandard
(
x
,
y
,
result
,
f
);
}
template
<
class
F
>
void
binary_nonstandard
(
argument
x
,
argument
y
,
argument
result
,
F
f
)
{
auto
output_shape
=
result
.
get_shape
();
auto
input_shape
=
x
.
get_shape
();
visit_all
(
result
,
x
,
y
)([
&
](
auto
output
,
auto
input1
,
auto
input2
)
{
visit_tensor_size
(
output_shape
.
lens
().
size
(),
[
&
](
auto
ndim
)
{
hip_tensor_descriptor
<
ndim
>
x_desc
(
x
.
get_shape
().
lens
(),
x
.
get_shape
().
strides
());
hip_tensor_descriptor
<
ndim
>
y_desc
(
y
.
get_shape
().
lens
(),
y
.
get_shape
().
strides
());
hip_tensor_descriptor
<
ndim
>
out_desc
(
output_shape
.
lens
(),
output_shape
.
strides
());
auto
*
xp
=
input1
.
data
();
auto
*
yp
=
input2
.
data
();
auto
*
outp
=
output
.
data
();
gs_launch
(
input_shape
.
elements
())([
=
](
auto
i
)
{
auto
outidx
=
out_desc
.
multi
(
i
);
size_t
xidx
=
x_desc
.
linear
(
outidx
);
size_t
yidx
=
y_desc
.
linear
(
outidx
);
outp
[
i
]
=
f
(
xp
[
xidx
],
yp
[
yidx
]);
});
});
});
}
template
<
class
F
>
void
binary_standard
(
argument
x
,
argument
y
,
argument
result
,
F
f
)
{
assert
(
x
.
get_shape
().
elements
()
==
y
.
get_shape
().
elements
());
auto
output_shape
=
result
.
get_shape
();
auto
input_shape
=
x
.
get_shape
();
visit_all
(
result
,
x
,
y
)([
&
](
auto
output
,
auto
input1
,
auto
input2
)
{
auto
*
xp
=
input1
.
data
();
auto
*
yp
=
input2
.
data
();
auto
*
outp
=
output
.
data
();
gs_launch
(
input_shape
.
elements
())([
=
](
auto
i
)
{
outp
[
i
]
=
f
(
xp
[
i
],
yp
[
i
]);
});
});
}
}
// namespace device
}
// namespace gpu
}
// namespace migraph
#endif
src/targets/gpu/device/include/migraph/gpu/device/launch.hpp
View file @
cff1144d
...
...
@@ -21,7 +21,7 @@ __global__ void launcher(F f)
f
(
idx
);
}
auto
launch
(
std
::
size_t
global
,
std
::
size_t
local
)
inline
auto
launch
(
std
::
size_t
global
,
std
::
size_t
local
)
{
return
[
=
](
auto
f
)
{
assert
(
local
>
0
);
...
...
@@ -33,7 +33,7 @@ auto launch(std::size_t global, std::size_t local)
};
}
auto
gs_launch
(
std
::
size_t
n
,
std
::
size_t
local
=
512
)
inline
auto
gs_launch
(
std
::
size_t
n
,
std
::
size_t
local
=
512
)
{
std
::
size_t
groups
=
1
+
n
/
local
;
std
::
size_t
nglobal
=
std
::
min
<
std
::
size_t
>
(
512
,
groups
)
*
local
;
...
...
src/targets/gpu/device/include/migraph/gpu/device/tensor.hpp
0 → 100644
View file @
cff1144d
#ifndef MIGRAPH_GUARD_RTGLIB_DEAVICE_TENSOR_HPP
#define MIGRAPH_GUARD_RTGLIB_DEAVICE_TENSOR_HPP
#include <hip/hip_runtime.h>
namespace
migraph
{
namespace
gpu
{
namespace
device
{
template
<
class
F
>
void
visit_tensor_size
(
std
::
size_t
n
,
F
f
)
{
switch
(
n
)
{
case
1
:
{
f
(
std
::
integral_constant
<
std
::
size_t
,
1
>
{});
break
;
}
case
2
:
{
f
(
std
::
integral_constant
<
std
::
size_t
,
2
>
{});
break
;
}
case
3
:
{
f
(
std
::
integral_constant
<
std
::
size_t
,
3
>
{});
break
;
}
case
4
:
{
f
(
std
::
integral_constant
<
std
::
size_t
,
4
>
{});
break
;
}
case
5
:
{
f
(
std
::
integral_constant
<
std
::
size_t
,
5
>
{});
break
;
}
default:
throw
std
::
runtime_error
(
"Unknown tensor size"
);
}
}
template
<
size_t
NDim
>
struct
hip_index
{
size_t
d
[
NDim
];
__device__
__host__
size_t
&
operator
[](
size_t
i
)
{
return
d
[
i
];
}
__device__
__host__
size_t
operator
[](
size_t
i
)
const
{
return
d
[
i
];
}
};
template
<
size_t
NDim
>
struct
hip_tensor_descriptor
{
__device__
__host__
hip_tensor_descriptor
()
=
default
;
template
<
typename
T
,
typename
V
>
__device__
__host__
hip_tensor_descriptor
(
const
T
&
lens_ext
,
const
V
&
strides_ext
)
{
for
(
size_t
i
=
0
;
i
<
NDim
;
i
++
)
lens
[
i
]
=
lens_ext
[
i
];
for
(
size_t
i
=
0
;
i
<
NDim
;
i
++
)
strides
[
i
]
=
strides_ext
[
i
];
}
__device__
__host__
hip_index
<
NDim
>
multi
(
size_t
idx
)
const
{
hip_index
<
NDim
>
result
{};
size_t
tidx
=
idx
;
for
(
size_t
is
=
0
;
is
<
NDim
;
is
++
)
{
result
[
is
]
=
tidx
/
strides
[
is
];
tidx
=
tidx
%
strides
[
is
];
}
return
result
;
}
__device__
__host__
size_t
linear
(
hip_index
<
NDim
>
s
)
const
{
size_t
idx
=
0
;
for
(
size_t
i
=
0
;
i
<
NDim
;
i
++
)
idx
+=
s
[
i
]
*
strides
[
i
];
return
idx
;
}
size_t
lens
[
NDim
]
=
{};
size_t
strides
[
NDim
]
=
{};
};
}
// namespace device
}
// namespace gpu
}
// namespace migraph
#endif
src/targets/gpu/device/include/migraph/gpu/device/unary.hpp
0 → 100644
View file @
cff1144d
#ifndef MIGRAPH_GUARD_RTGLIB_DEVICE_UNARY_HPP
#define MIGRAPH_GUARD_RTGLIB_DEVICE_UNARY_HPP
#include <migraph/gpu/device/tensor.hpp>
#include <migraph/gpu/device/launch.hpp>
namespace
migraph
{
namespace
gpu
{
namespace
device
{
template
<
class
F
>
void
unary
(
argument
x
,
argument
result
,
F
f
)
{
if
(
x
.
get_shape
().
standard
())
unary_standard
(
x
,
result
,
f
);
else
unary_nonstandard
(
x
,
result
,
f
);
}
template
<
class
F
>
void
unary_nonstandard
(
argument
x
,
argument
result
,
F
f
)
{
auto
output_shape
=
result
.
get_shape
();
auto
input_shape
=
x
.
get_shape
();
visit_all
(
result
,
x
)([
&
](
auto
output
,
auto
input
)
{
visit_tensor_size
(
output_shape
.
lens
().
size
(),
[
&
](
auto
ndim
)
{
hip_tensor_descriptor
<
ndim
>
x_desc
(
input_shape
.
lens
(),
input_shape
.
strides
());
hip_tensor_descriptor
<
ndim
>
out_desc
(
output_shape
.
lens
(),
output_shape
.
strides
());
auto
*
xp
=
input
.
data
();
auto
*
outp
=
output
.
data
();
gs_launch
(
input_shape
.
elements
())([
=
](
auto
i
)
{
size_t
xidx
=
x_desc
.
linear
(
out_desc
.
multi
(
i
));
outp
[
i
]
=
f
(
xp
[
xidx
]);
});
});
});
}
template
<
class
F
>
void
unary_standard
(
argument
x
,
argument
result
,
F
f
)
{
auto
output_shape
=
result
.
get_shape
();
auto
input_shape
=
x
.
get_shape
();
visit_all
(
result
,
x
)([
&
](
auto
output
,
auto
input
)
{
auto
*
xp
=
input
.
data
();
auto
*
outp
=
output
.
data
();
gs_launch
(
input_shape
.
elements
())([
=
](
auto
i
)
{
outp
[
i
]
=
f
(
xp
[
i
]);
});
});
}
}
// namespace device
}
// namespace gpu
}
// namespace migraph
#endif
src/targets/gpu/include/migraph/gpu/device/add_relu.hpp
0 → 100644
View file @
cff1144d
#ifndef MIGRAPH_GUARD_RTGLIB_DEVICE_ADD_RELU_HPP
#define MIGRAPH_GUARD_RTGLIB_DEVICE_ADD_RELU_HPP
#include <migraph/argument.hpp>
namespace
migraph
{
namespace
gpu
{
namespace
device
{
void
add_relu
(
argument
arg1
,
argument
arg2
,
argument
result
);
}
// namespace device
}
// namespace gpu
}
// namespace migraph
#endif
src/targets/gpu/include/migraph/gpu/device/contiguous.hpp
View file @
cff1144d
...
...
@@ -7,7 +7,7 @@ namespace migraph {
namespace
gpu
{
namespace
device
{
void
contiguous
(
shape
output_shape
,
argument
arg
,
argument
result
);
void
contiguous
(
argument
arg
,
argument
result
);
}
// namespace device
}
// namespace gpu
...
...
src/targets/gpu/lowering.cpp
View file @
cff1144d
...
...
@@ -252,7 +252,8 @@ struct miopen_contiguous
{
assert
(
output_shape
==
args
[
1
].
get_shape
());
assert
(
output_shape
.
standard
());
device
::
contiguous
(
std
::
move
(
output_shape
),
args
.
at
(
0
),
args
.
at
(
1
));
(
void
)
output_shape
;
device
::
contiguous
(
args
.
at
(
0
),
args
.
at
(
1
));
return
args
.
at
(
1
);
}
};
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment