Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
d9170e2d
Commit
d9170e2d
authored
Sep 12, 2018
by
Paul
Browse files
Merge branch 'master' into im2col_cpu
parents
674ea92d
9fee0fe4
Changes
38
Hide whitespace changes
Inline
Side-by-side
Showing
18 changed files
with
624 additions
and
98 deletions
+624
-98
src/program.cpp
src/program.cpp
+1
-0
src/shape.cpp
src/shape.cpp
+14
-9
src/targets/gpu/CMakeLists.txt
src/targets/gpu/CMakeLists.txt
+1
-1
src/targets/gpu/device/add.cpp
src/targets/gpu/device/add.cpp
+15
-0
src/targets/gpu/device/add_relu.cpp
src/targets/gpu/device/add_relu.cpp
+2
-3
src/targets/gpu/device/include/migraph/gpu/device/launch.hpp
src/targets/gpu/device/include/migraph/gpu/device/launch.hpp
+10
-2
src/targets/gpu/device/include/migraph/gpu/device/nary.hpp
src/targets/gpu/device/include/migraph/gpu/device/nary.hpp
+205
-23
src/targets/gpu/device/include/migraph/gpu/device/tensor.hpp
src/targets/gpu/device/include/migraph/gpu/device/tensor.hpp
+6
-6
src/targets/gpu/fuse_ops.cpp
src/targets/gpu/fuse_ops.cpp
+5
-2
src/targets/gpu/include/migraph/gpu/device/add.hpp
src/targets/gpu/include/migraph/gpu/device/add.hpp
+17
-0
src/targets/gpu/include/migraph/gpu/device/add_relu.hpp
src/targets/gpu/include/migraph/gpu/device/add_relu.hpp
+1
-1
src/targets/gpu/lowering.cpp
src/targets/gpu/lowering.cpp
+21
-3
src/targets/gpu/target.cpp
src/targets/gpu/target.cpp
+5
-2
test/eliminate_allocation_test.cpp
test/eliminate_allocation_test.cpp
+109
-0
test/gpu/literal.cpp
test/gpu/literal.cpp
+18
-0
test/gpu/miopen.cpp
test/gpu/miopen.cpp
+120
-41
test/operation.cpp
test/operation.cpp
+2
-0
test/shape_test.cpp
test/shape_test.cpp
+72
-5
No files found.
src/program.cpp
View file @
d9170e2d
...
@@ -179,6 +179,7 @@ instruction_ref program::add_outline(const shape& s)
...
@@ -179,6 +179,7 @@ instruction_ref program::add_outline(const shape& s)
instruction_ref
program
::
add_parameter
(
std
::
string
name
,
shape
s
)
instruction_ref
program
::
add_parameter
(
std
::
string
name
,
shape
s
)
{
{
assert
(
get_parameter_shape
(
name
)
==
shape
{});
impl
->
instructions
.
push_front
({
builtin
::
param
{
std
::
move
(
name
)},
std
::
move
(
s
),
{}});
impl
->
instructions
.
push_front
({
builtin
::
param
{
std
::
move
(
name
)},
std
::
move
(
s
),
{}});
return
impl
->
instructions
.
begin
();
return
impl
->
instructions
.
begin
();
}
}
...
...
src/shape.cpp
View file @
d9170e2d
...
@@ -116,15 +116,20 @@ std::size_t shape::index(std::size_t i) const
...
@@ -116,15 +116,20 @@ std::size_t shape::index(std::size_t i) const
if
(
this
->
standard
())
if
(
this
->
standard
())
return
i
;
return
i
;
else
else
return
std
::
inner_product
(
this
->
lens
().
begin
(),
{
this
->
lens
().
end
(),
std
::
size_t
s
=
1
;
this
->
strides
().
begin
(),
std
::
size_t
result
=
0
;
std
::
size_t
{
0
},
for
(
std
::
size_t
j
=
0
;
j
<
this
->
lens
().
size
();
j
++
)
std
::
plus
<
std
::
size_t
>
{},
{
[
&
](
std
::
size_t
len
,
std
::
size_t
stride
)
{
const
std
::
size_t
k
=
this
->
lens
().
size
()
-
j
-
1
;
assert
(
stride
>
0
and
len
>
0
);
const
std
::
size_t
stride
=
this
->
strides
()[
k
];
return
((
i
/
stride
)
%
len
)
*
stride
;
const
std
::
size_t
len
=
this
->
lens
()[
k
];
});
const
std
::
size_t
idx
=
(
i
%
(
s
*
len
))
/
s
;
result
+=
stride
*
idx
;
s
*=
len
;
}
return
result
;
}
}
}
bool
shape
::
packed
()
const
{
return
this
->
elements
()
==
this
->
element_space
();
}
bool
shape
::
packed
()
const
{
return
this
->
elements
()
==
this
->
element_space
();
}
...
...
src/targets/gpu/CMakeLists.txt
View file @
d9170e2d
...
@@ -11,6 +11,7 @@ if(NOT TARGET MIOpen)
...
@@ -11,6 +11,7 @@ if(NOT TARGET MIOpen)
endif
()
endif
()
add_library
(
migraph_device
add_library
(
migraph_device
device/add.cpp
device/add_relu.cpp
device/add_relu.cpp
device/contiguous.cpp
device/contiguous.cpp
)
)
...
@@ -20,7 +21,6 @@ target_include_directories(migraph_device PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRE
...
@@ -20,7 +21,6 @@ target_include_directories(migraph_device PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRE
target_include_directories
(
migraph_device PRIVATE $<BUILD_INTERFACE:
${
CMAKE_CURRENT_SOURCE_DIR
}
/device/include>
)
target_include_directories
(
migraph_device PRIVATE $<BUILD_INTERFACE:
${
CMAKE_CURRENT_SOURCE_DIR
}
/device/include>
)
add_library
(
migraph_gpu
add_library
(
migraph_gpu
eliminate_allocation.cpp
eliminate_workspace.cpp
eliminate_workspace.cpp
fuse_ops.cpp
fuse_ops.cpp
hip.cpp
hip.cpp
...
...
src/targets/gpu/device/add.cpp
0 → 100644
View file @
d9170e2d
#include <migraph/gpu/device/add.hpp>
#include <migraph/gpu/device/nary.hpp>
namespace
migraph
{
namespace
gpu
{
namespace
device
{
void
add
(
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
)
{
nary
(
result
,
arg1
,
arg2
)([](
auto
x
,
auto
y
)
{
return
x
+
y
;
});
}
}
// namespace device
}
// namespace gpu
}
// namespace migraph
src/targets/gpu/device/add_relu.cpp
View file @
d9170e2d
...
@@ -5,10 +5,9 @@ namespace migraph {
...
@@ -5,10 +5,9 @@ namespace migraph {
namespace
gpu
{
namespace
gpu
{
namespace
device
{
namespace
device
{
void
add_relu
(
argument
result
,
argument
arg1
,
argument
arg2
)
void
add_relu
(
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
)
{
{
nary_standard
(
std
::
move
(
result
),
std
::
move
(
arg1
),
std
::
move
(
arg2
))(
nary
(
result
,
arg1
,
arg2
)([](
auto
x
,
auto
y
)
{
return
std
::
max
<
decltype
(
x
+
y
)
>
(
0
,
x
+
y
);
});
[](
auto
x
,
auto
y
)
{
return
max
(
0
,
x
+
y
);
});
}
}
}
// namespace device
}
// namespace device
...
...
src/targets/gpu/device/include/migraph/gpu/device/launch.hpp
View file @
d9170e2d
...
@@ -33,10 +33,10 @@ inline auto launch(std::size_t global, std::size_t local)
...
@@ -33,10 +33,10 @@ inline auto launch(std::size_t global, std::size_t local)
};
};
}
}
inline
auto
gs_launch
(
std
::
size_t
n
,
std
::
size_t
local
=
512
)
inline
auto
gs_launch
(
std
::
size_t
n
,
std
::
size_t
local
=
1024
)
{
{
std
::
size_t
groups
=
1
+
n
/
local
;
std
::
size_t
groups
=
1
+
n
/
local
;
std
::
size_t
nglobal
=
std
::
min
<
std
::
size_t
>
(
512
,
groups
)
*
local
;
std
::
size_t
nglobal
=
std
::
min
<
std
::
size_t
>
(
256
,
groups
)
*
local
;
return
[
=
](
auto
f
)
{
return
[
=
](
auto
f
)
{
launch
(
nglobal
,
local
)([
=
](
auto
idx
)
{
launch
(
nglobal
,
local
)([
=
](
auto
idx
)
{
...
@@ -48,6 +48,14 @@ inline auto gs_launch(std::size_t n, std::size_t local = 512)
...
@@ -48,6 +48,14 @@ inline auto gs_launch(std::size_t n, std::size_t local = 512)
};
};
}
}
// Workaround hcc's broken tile_static macro
#ifdef tile_static
#undef tile_static
#define MIGRAPH_DEVICE_SHARED __attribute__((tile_static))
#else
#define MIGRAPH_DEVICE_SHARED __shared__
#endif
}
// namespace device
}
// namespace device
}
// namespace gpu
}
// namespace gpu
}
// namespace migraph
}
// namespace migraph
...
...
src/targets/gpu/device/include/migraph/gpu/device/nary.hpp
View file @
d9170e2d
...
@@ -10,16 +10,25 @@ namespace migraph {
...
@@ -10,16 +10,25 @@ namespace migraph {
namespace
gpu
{
namespace
gpu
{
namespace
device
{
namespace
device
{
template
<
class
...
Arguments
>
template
<
class
T
>
auto
nary
(
argument
result
,
Arguments
...
args
)
using
vec4
=
T
__attribute__
((
ext_vector_type
(
4
)));
template
<
class
T
>
__device__
__host__
vec4
<
T
>*
as_vec4
(
T
*
x
)
{
{
return
[
=
](
auto
f
)
{
return
reinterpret_cast
<
vec4
<
T
>*>
(
x
);
if
(
all_of
({
args
...},
[](
const
shape
&
s
)
{
return
s
.
standard
();
}))
}
nary_standard
(
result
,
args
...)(
f
);
else
nary_nonstandard
(
result
,
args
...)(
f
);
};
template
<
class
T
>
__device__
__host__
T
*
as_pointer
(
vec4
<
T
>*
x
)
{
return
reinterpret_cast
<
T
*>
(
x
);
}
template
<
class
...
Ts
>
auto
pack_vec4
(
Ts
...
xs
)
{
return
[
=
](
auto
f
,
std
::
size_t
n
)
{
return
f
(
as_vec4
(
xs
)[
n
]...);
};
}
}
template
<
class
F
,
class
...
Arguments
>
template
<
class
F
,
class
...
Arguments
>
...
@@ -28,14 +37,12 @@ auto nary_nonstandard_impl(F f, argument result, Arguments... args)
...
@@ -28,14 +37,12 @@ auto nary_nonstandard_impl(F f, argument result, Arguments... args)
const
auto
&
output_shape
=
result
.
get_shape
();
const
auto
&
output_shape
=
result
.
get_shape
();
visit_all
(
result
,
args
...)([
&
](
auto
output
,
auto
...
inputs
)
{
visit_all
(
result
,
args
...)([
&
](
auto
output
,
auto
...
inputs
)
{
visit_tensor_size
(
output_shape
.
lens
().
size
(),
[
&
](
auto
ndim
)
{
visit_tensor_size
(
output_shape
.
lens
().
size
(),
[
&
](
auto
ndim
)
{
auto
data
=
make_sequence
(
auto
data
=
pack
(
std
::
make_pair
(
hip_tensor_descriptor
<
ndim
>
{
inputs
.
get_shape
().
lens
(),
std
::
make_pair
(
hip_tensor_descriptor
<
ndim
>
{
inputs
.
get_shape
()},
inputs
.
data
())...);
inputs
.
get_shape
().
strides
()},
hip_tensor_descriptor
<
ndim
>
out_desc
(
output_shape
);
inputs
.
data
())...);
hip_tensor_descriptor
<
ndim
>
out_desc
(
output_shape
.
lens
(),
output_shape
.
strides
());
auto
*
outp
=
output
.
data
();
auto
*
outp
=
output
.
data
();
gs_launch
(
output_shape
.
elements
())([
=
](
auto
i
)
{
gs_launch
(
output_shape
.
elements
())([
=
](
auto
i
)
{
data
([
&
](
auto
...
ps
)
{
data
([
&
](
auto
&&
...
ps
)
{
auto
outidx
=
out_desc
.
multi
(
i
);
auto
outidx
=
out_desc
.
multi
(
i
);
outp
[
i
]
=
f
(
ps
.
second
[
ps
.
first
.
linear
(
outidx
)]...);
outp
[
i
]
=
f
(
ps
.
second
[
ps
.
first
.
linear
(
outidx
)]...);
});
});
...
@@ -44,24 +51,199 @@ auto nary_nonstandard_impl(F f, argument result, Arguments... args)
...
@@ -44,24 +51,199 @@ auto nary_nonstandard_impl(F f, argument result, Arguments... args)
});
});
}
}
template
<
class
F
>
void
binary_broadcast_vec_impl
(
F
f
,
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
)
{
const
auto
&
output_shape
=
result
.
get_shape
();
const
auto
&
b_shape
=
arg2
.
get_shape
();
auto
bdim
=
std
::
distance
(
b_shape
.
strides
().
begin
(),
std
::
find_if
(
b_shape
.
strides
().
begin
(),
b_shape
.
strides
().
end
(),
[](
auto
x
)
{
return
x
!=
0
;
}));
auto
bdim_len
=
output_shape
.
lens
()[
bdim
];
auto
bdim_stride
=
output_shape
.
strides
()[
bdim
];
auto
bdim_next_stride
=
bdim_stride
*
bdim_len
;
visit_all
(
result
,
arg1
,
arg2
)([
&
](
auto
output
,
auto
input1
,
auto
input2
)
{
using
type
=
std
::
remove_cv_t
<
typename
decltype
(
output
)
::
value_type
>
;
auto
*
xp
=
as_vec4
(
input1
.
data
());
auto
*
yp
=
as_vec4
(
input2
.
data
());
auto
*
outp
=
as_vec4
(
output
.
data
());
const
std
::
size_t
vec_size
=
4
;
const
std
::
size_t
nlocal
=
1024
;
const
std
::
size_t
nglobal
=
256
*
nlocal
;
const
std
::
size_t
n
=
output
.
size
()
/
vec_size
;
const
std
::
size_t
bdim_vec_len
=
bdim_len
/
vec_size
;
launch
(
nglobal
,
nlocal
)([
=
](
auto
idx
)
__device__
{
MIGRAPH_DEVICE_SHARED
vec4
<
type
>
buffer
[
2048
/
vec_size
];
// Load bias into LDS
for
(
size_t
i
=
idx
.
local
;
i
<
bdim_vec_len
;
i
+=
nlocal
)
{
buffer
[
i
]
=
yp
[
i
];
}
__syncthreads
();
auto
*
bp
=
as_pointer
(
buffer
);
// Process the data
for
(
size_t
i
=
idx
.
global
;
i
<
n
;
i
+=
nglobal
)
{
auto
bidx
=
((
i
*
vec_size
)
%
bdim_next_stride
)
/
bdim_stride
;
auto
b
=
bp
[
bidx
];
vec4
<
type
>
x
=
xp
[
i
];
vec4
<
type
>
out
=
outp
[
i
];
for
(
std
::
size_t
j
=
0
;
j
<
vec_size
;
j
++
)
{
out
[
j
]
=
f
(
x
[
j
],
b
);
}
outp
[
i
]
=
out
;
}
});
});
}
template
<
class
F
>
void
binary_broadcast_impl
(
F
f
,
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
)
{
const
auto
&
output_shape
=
result
.
get_shape
();
const
auto
&
b_shape
=
arg2
.
get_shape
();
auto
bdim
=
std
::
distance
(
b_shape
.
strides
().
begin
(),
std
::
find_if
(
b_shape
.
strides
().
begin
(),
b_shape
.
strides
().
end
(),
[](
auto
x
)
{
return
x
!=
0
;
}));
auto
bdim_len
=
output_shape
.
lens
()[
bdim
];
auto
bdim_stride
=
output_shape
.
strides
()[
bdim
];
auto
bdim_next_stride
=
bdim_stride
*
bdim_len
;
visit_all
(
result
,
arg1
,
arg2
)([
&
](
auto
output
,
auto
input1
,
auto
input2
)
{
using
type
=
std
::
remove_cv_t
<
typename
decltype
(
output
)
::
value_type
>
;
auto
*
xp
=
input1
.
data
();
auto
*
yp
=
input2
.
data
();
auto
*
outp
=
output
.
data
();
const
std
::
size_t
nlocal
=
1024
;
const
std
::
size_t
nglobal
=
256
*
nlocal
;
const
std
::
size_t
n
=
output
.
size
();
launch
(
nglobal
,
nlocal
)([
=
](
auto
idx
)
__device__
{
MIGRAPH_DEVICE_SHARED
type
buffer
[
2048
];
// Load bias into LDS
for
(
size_t
i
=
idx
.
local
;
i
<
bdim_len
;
i
+=
nlocal
)
{
buffer
[
i
]
=
yp
[
i
];
}
__syncthreads
();
// Process the data
for
(
size_t
i
=
idx
.
global
;
i
<
n
;
i
+=
nglobal
)
{
auto
bidx
=
(
i
%
bdim_next_stride
)
/
bdim_stride
;
auto
b
=
buffer
[
bidx
];
type
x
=
xp
[
i
];
outp
[
i
]
=
f
(
x
,
b
);
}
});
});
}
template
<
class
F
,
class
...
Arguments
>
void
nary_standard_vec_impl
(
F
f
,
argument
result
,
Arguments
...
args
)
{
// assert(x.get_shape().elements() == y.get_shape().elements());
const
auto
&
output_shape
=
result
.
get_shape
();
visit_all
(
result
,
args
...)([
&
](
auto
output
,
auto
...
inputs
)
{
using
type
=
std
::
remove_cv_t
<
typename
decltype
(
output
)
::
value_type
>
;
const
std
::
size_t
vec_size
=
4
;
auto
data
=
pack_vec4
(
inputs
.
data
()...);
auto
*
outp
=
as_vec4
(
output
.
data
());
gs_launch
(
output_shape
.
elements
()
/
vec_size
)([
=
](
auto
i
)
{
vec4
<
type
>
out
=
outp
[
i
];
data
(
[
&
](
auto
...
xs
)
{
for
(
std
::
size_t
j
=
0
;
j
<
vec_size
;
j
++
)
{
out
[
j
]
=
f
(
xs
[
j
]...);
}
},
i
);
outp
[
i
]
=
out
;
});
});
}
template
<
class
F
,
class
...
Arguments
>
void
nary_standard_impl
(
F
f
,
argument
result
,
Arguments
...
args
)
{
// assert(x.get_shape().elements() == y.get_shape().elements());
const
auto
&
output_shape
=
result
.
get_shape
();
visit_all
(
result
,
args
...)([
&
](
auto
output
,
auto
...
inputs
)
{
auto
data
=
pack
(
inputs
.
data
()...);
auto
*
outp
=
output
.
data
();
gs_launch
(
output_shape
.
elements
())(
[
=
](
auto
i
)
{
data
([
&
](
auto
...
xps
)
{
outp
[
i
]
=
f
(
xps
[
i
]...);
});
});
});
}
template
<
class
F
,
class
...
Arguments
>
void
nary_impl
(
F
f
,
argument
result
,
Arguments
...
args
)
{
bool
standard
=
all_of
({
args
.
get_shape
()...},
[](
const
shape
&
s
)
{
return
s
.
standard
();
});
bool
packed
=
all_of
({
args
.
get_shape
()...},
[](
const
shape
&
s
)
{
return
s
.
packed
();
});
bool
same_shapes
=
all_of
({
args
.
get_shape
()...},
[
&
](
const
shape
&
s
)
{
return
s
==
result
.
get_shape
();
});
if
(
standard
or
(
packed
and
same_shapes
))
nary_standard_impl
(
f
,
result
,
args
...);
else
nary_nonstandard_impl
(
f
,
result
,
args
...);
}
template
<
class
...
Arguments
>
template
<
class
...
Arguments
>
auto
nary_nonstandard
(
argument
result
,
Arguments
...
args
)
auto
nary_nonstandard
(
argument
result
,
Arguments
...
args
)
{
{
return
[
=
](
auto
f
)
{
return
nary_nonstandard_impl
(
f
,
result
,
args
...);
};
return
[
=
](
auto
f
)
{
nary_nonstandard_impl
(
f
,
result
,
args
...);
};
}
}
template
<
class
...
Arguments
>
template
<
class
...
Arguments
>
auto
nary_standard
(
argument
result
,
Arguments
...
args
)
auto
nary_standard
(
argument
result
,
Arguments
...
args
)
{
return
[
=
](
auto
f
)
{
nary_standard_impl
(
f
,
result
,
args
...);
};
}
template
<
class
...
Arguments
>
auto
nary
(
argument
result
,
Arguments
...
args
)
{
return
[
=
](
auto
f
)
{
nary_impl
(
f
,
result
,
args
...);
};
}
inline
auto
nary
(
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
)
{
{
return
[
=
](
auto
f
)
{
return
[
=
](
auto
f
)
{
// assert(x.get_shape().elements() == y.get_shape().elements());
// TODO: Check result and arg1 shape is the same
const
auto
&
output_shape
=
result
.
get_shape
();
if
(
arg1
.
get_shape
().
standard
()
and
arg2
.
get_shape
().
broadcasted
())
visit_all
(
result
,
args
...)([
&
](
auto
output
,
auto
...
inputs
)
{
{
auto
data
=
make_sequence
(
inputs
.
data
()...);
auto
not_zero
=
[](
auto
x
)
{
return
x
!=
0
;
};
auto
*
outp
=
output
.
data
();
const
auto
&
strides
=
arg2
.
get_shape
().
strides
();
gs_launch
(
output_shape
.
elements
())(
auto
b_it
=
std
::
find_if
(
strides
.
begin
(),
strides
.
end
(),
not_zero
);
[
=
](
auto
i
)
{
data
([
&
](
auto
...
xps
)
{
outp
[
i
]
=
f
(
xps
[
i
]...);
});
});
auto
b_idx
=
std
::
distance
(
strides
.
begin
(),
b_it
);
});
auto
b_len
=
result
.
get_shape
().
lens
()[
b_idx
];
auto
b_stride
=
result
.
get_shape
().
strides
()[
b_idx
];
assert
(
arg2
.
get_shape
().
lens
()[
b_idx
]
==
b_len
);
if
(
b_len
<=
2048
and
std
::
none_of
(
std
::
next
(
b_it
),
strides
.
end
(),
not_zero
))
{
const
bool
divisible_by_4
=
(
b_len
%
4
==
0
)
and
(
b_stride
%
4
==
0
)
and
(
arg1
.
get_shape
().
elements
()
%
4
==
0
);
if
(
divisible_by_4
)
binary_broadcast_vec_impl
(
f
,
result
,
arg1
,
arg2
);
else
binary_broadcast_impl
(
f
,
result
,
arg1
,
arg2
);
return
;
}
}
nary_impl
(
f
,
result
,
arg1
,
arg2
);
};
};
}
}
...
...
src/targets/gpu/device/include/migraph/gpu/device/tensor.hpp
View file @
d9170e2d
...
@@ -2,6 +2,7 @@
...
@@ -2,6 +2,7 @@
#define MIGRAPH_GUARD_RTGLIB_DEAVICE_TENSOR_HPP
#define MIGRAPH_GUARD_RTGLIB_DEAVICE_TENSOR_HPP
#include <hip/hip_runtime.h>
#include <hip/hip_runtime.h>
#include <migraph/functional.hpp>
namespace
migraph
{
namespace
migraph
{
namespace
gpu
{
namespace
gpu
{
...
@@ -53,14 +54,13 @@ template <size_t NDim>
...
@@ -53,14 +54,13 @@ template <size_t NDim>
struct
hip_tensor_descriptor
struct
hip_tensor_descriptor
{
{
__device__
__host__
hip_tensor_descriptor
()
=
default
;
__device__
__host__
hip_tensor_descriptor
()
=
default
;
template
<
typename
T
,
typename
V
>
__device__
__host__
hip_tensor_descriptor
(
const
T
&
lens_ext
,
const
V
&
strides_ext
)
hip_tensor_descriptor
(
const
shape
&
s
)
{
{
for
(
size_t
i
=
0
;
i
<
NDim
;
i
++
)
std
::
copy
(
s
.
lens
().
begin
(),
s
.
lens
().
end
(),
lens
);
lens
[
i
]
=
lens_ext
[
i
];
std
::
copy
(
s
.
strides
().
begin
(),
s
.
strides
().
end
(),
strides
);
for
(
size_t
i
=
0
;
i
<
NDim
;
i
++
)
strides
[
i
]
=
strides_ext
[
i
];
}
}
__device__
__host__
hip_index
<
NDim
>
multi
(
size_t
idx
)
const
__device__
__host__
hip_index
<
NDim
>
multi
(
size_t
idx
)
const
{
{
hip_index
<
NDim
>
result
{};
hip_index
<
NDim
>
result
{};
...
...
src/targets/gpu/fuse_ops.cpp
View file @
d9170e2d
...
@@ -12,7 +12,7 @@ struct hip_add_relu
...
@@ -12,7 +12,7 @@ struct hip_add_relu
std
::
string
name
()
const
{
return
"hip::add_relu"
;
}
std
::
string
name
()
const
{
return
"hip::add_relu"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
{
check_shapes
{
inputs
}.
has
(
3
).
standard
(
);
check_shapes
{
inputs
,
*
this
}.
has
(
3
);
return
inputs
.
front
();
return
inputs
.
front
();
}
}
argument
compute
(
context
&
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
argument
compute
(
context
&
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
...
@@ -31,7 +31,10 @@ void fuse_ops::apply(program& p) const
...
@@ -31,7 +31,10 @@ void fuse_ops::apply(program& p) const
auto
add_ins
=
ins
->
arguments
.
front
();
auto
add_ins
=
ins
->
arguments
.
front
();
if
(
add_ins
->
op
.
name
()
!=
"gpu::add"
)
if
(
add_ins
->
op
.
name
()
!=
"gpu::add"
)
continue
;
continue
;
p
.
replace_instruction
(
ins
,
hip_add_relu
{},
add_ins
->
arguments
);
auto
args
=
add_ins
->
arguments
;
// Use the allocation from the relu operator
args
.
back
()
=
ins
->
arguments
.
back
();
p
.
replace_instruction
(
ins
,
hip_add_relu
{},
args
);
}
}
}
}
...
...
src/targets/gpu/include/migraph/gpu/device/add.hpp
0 → 100644
View file @
d9170e2d
#ifndef MIGRAPH_GUARD_RTGLIB_DEVICE_ADD_HPP
#define MIGRAPH_GUARD_RTGLIB_DEVICE_ADD_HPP
#include <migraph/argument.hpp>
namespace
migraph
{
namespace
gpu
{
namespace
device
{
void
add
(
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
);
}
// namespace device
}
// namespace gpu
}
// namespace migraph
#endif
src/targets/gpu/include/migraph/gpu/device/add_relu.hpp
View file @
d9170e2d
...
@@ -8,7 +8,7 @@ namespace migraph {
...
@@ -8,7 +8,7 @@ namespace migraph {
namespace
gpu
{
namespace
gpu
{
namespace
device
{
namespace
device
{
void
add_relu
(
argument
result
,
argument
arg1
,
argument
arg2
);
void
add_relu
(
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
);
}
// namespace device
}
// namespace device
}
// namespace gpu
}
// namespace gpu
...
...
src/targets/gpu/lowering.cpp
View file @
d9170e2d
...
@@ -9,6 +9,7 @@
...
@@ -9,6 +9,7 @@
#include <migraph/gpu/hip.hpp>
#include <migraph/gpu/hip.hpp>
#include <migraph/dfor.hpp>
#include <migraph/dfor.hpp>
#include <migraph/gpu/device/contiguous.hpp>
#include <migraph/gpu/device/contiguous.hpp>
#include <migraph/gpu/device/add.hpp>
#include <migraph/iterator_for.hpp>
#include <migraph/iterator_for.hpp>
#include <migraph/gpu/rocblas.hpp>
#include <migraph/gpu/rocblas.hpp>
#include <migraph/gpu/context.hpp>
#include <migraph/gpu/context.hpp>
...
@@ -168,6 +169,23 @@ struct miopen_pooling
...
@@ -168,6 +169,23 @@ struct miopen_pooling
}
}
};
};
struct
hip_add
{
std
::
string
name
()
const
{
return
"gpu::add"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
// check_shapes{inputs, *this}.has(3).standard();
check_shapes
{
inputs
,
*
this
}.
has
(
3
);
return
inputs
.
at
(
0
);
}
argument
compute
(
context
&
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
{
device
::
add
(
args
[
2
],
args
[
0
],
args
[
1
]);
return
args
[
2
];
}
};
struct
miopen_add
struct
miopen_add
{
{
std
::
string
name
()
const
{
return
"gpu::add"
;
}
std
::
string
name
()
const
{
return
"gpu::add"
;
}
...
@@ -202,7 +220,7 @@ struct miopen_add
...
@@ -202,7 +220,7 @@ struct miopen_add
struct
miopen_gemm
struct
miopen_gemm
{
{
gemm
op
;
gemm
op
;
std
::
string
name
()
const
{
return
"gpu::
convolution
"
;
}
std
::
string
name
()
const
{
return
"gpu::
gemm
"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
{
check_shapes
{
inputs
,
*
this
}.
has
(
3
);
check_shapes
{
inputs
,
*
this
}.
has
(
3
);
...
@@ -337,7 +355,7 @@ struct miopen_apply
...
@@ -337,7 +355,7 @@ struct miopen_apply
instruction_ref
insert_allocation
(
instruction_ref
ins
,
const
shape
&
s
,
std
::
string
tag
=
""
)
instruction_ref
insert_allocation
(
instruction_ref
ins
,
const
shape
&
s
,
std
::
string
tag
=
""
)
{
{
if
(
ins
==
--
prog
->
end
())
if
(
ins
==
--
prog
->
end
()
and
tag
.
empty
()
)
{
{
return
prog
->
add_parameter
(
"output"
,
s
);
return
prog
->
add_parameter
(
"output"
,
s
);
}
}
...
@@ -390,7 +408,7 @@ struct miopen_apply
...
@@ -390,7 +408,7 @@ struct miopen_apply
{
{
auto
output
=
insert_allocation
(
ins
,
ins
->
result
);
auto
output
=
insert_allocation
(
ins
,
ins
->
result
);
return
prog
->
replace_instruction
(
return
prog
->
replace_instruction
(
ins
,
miopen
_add
{},
ins
->
arguments
.
at
(
0
),
ins
->
arguments
.
at
(
1
),
output
);
ins
,
hip
_add
{},
ins
->
arguments
.
at
(
0
),
ins
->
arguments
.
at
(
1
),
output
);
}
}
instruction_ref
apply_gemm
(
instruction_ref
ins
)
instruction_ref
apply_gemm
(
instruction_ref
ins
)
...
...
src/targets/gpu/target.cpp
View file @
d9170e2d
...
@@ -3,13 +3,14 @@
...
@@ -3,13 +3,14 @@
#include <migraph/gpu/write_literals.hpp>
#include <migraph/gpu/write_literals.hpp>
#include <migraph/gpu/context.hpp>
#include <migraph/gpu/context.hpp>
#include <migraph/gpu/eliminate_workspace.hpp>
#include <migraph/gpu/eliminate_workspace.hpp>
#include <migraph/
gpu/
eliminate_allocation.hpp>
#include <migraph/eliminate_allocation.hpp>
#include <migraph/gpu/fuse_ops.hpp>
#include <migraph/gpu/fuse_ops.hpp>
#include <migraph/check_context.hpp>
#include <migraph/check_context.hpp>
#include <migraph/auto_contiguous.hpp>
#include <migraph/auto_contiguous.hpp>
#include <migraph/dead_code_elimination.hpp>
#include <migraph/dead_code_elimination.hpp>
#include <migraph/simplify_reshapes.hpp>
#include <migraph/simplify_reshapes.hpp>
#include <migraph/eliminate_contiguous.hpp>
#include <migraph/eliminate_contiguous.hpp>
#include <migraph/fwd_conv_batchnorm_rewrite.hpp>
namespace
migraph
{
namespace
migraph
{
namespace
gpu
{
namespace
gpu
{
...
@@ -20,6 +21,8 @@ std::vector<pass> target::get_passes(migraph::context& gctx) const
...
@@ -20,6 +21,8 @@ std::vector<pass> target::get_passes(migraph::context& gctx) const
// clang-format off
// clang-format off
return
return
{
{
dead_code_elimination
{},
fwd_conv_batchnorm_rewrite
{},
dead_code_elimination
{},
dead_code_elimination
{},
auto_contiguous
{},
auto_contiguous
{},
simplify_reshapes
{},
simplify_reshapes
{},
...
@@ -31,7 +34,7 @@ std::vector<pass> target::get_passes(migraph::context& gctx) const
...
@@ -31,7 +34,7 @@ std::vector<pass> target::get_passes(migraph::context& gctx) const
eliminate_contiguous
{},
eliminate_contiguous
{},
dead_code_elimination
{},
dead_code_elimination
{},
write_literals
{
&
ctx
},
write_literals
{
&
ctx
},
eliminate_allocation
{},
eliminate_allocation
{
""
},
check_context
<
context
>
{},
check_context
<
context
>
{},
dead_code_elimination
{}
dead_code_elimination
{}
};
};
...
...
test/eliminate_allocation_test.cpp
0 → 100644
View file @
d9170e2d
#include <migraph/eliminate_allocation.hpp>
#include <migraph/dead_code_elimination.hpp>
#include <migraph/operators.hpp>
#include <basic_ops.hpp>
#include <test.hpp>
struct
eliminate_allocation_target
{
std
::
size_t
align
=
32
;
std
::
string
name
()
const
{
return
"eliminate_allocation"
;
}
std
::
vector
<
migraph
::
pass
>
get_passes
(
migraph
::
context
&
)
const
{
return
{
migraph
::
eliminate_allocation
{
"allocate"
,
align
},
migraph
::
dead_code_elimination
{}};
}
migraph
::
context
get_context
()
const
{
return
{};
}
};
struct
allocate
{
migraph
::
shape
s
{};
std
::
string
name
()
const
{
return
"allocate"
;
}
migraph
::
shape
compute_shape
(
const
std
::
vector
<
migraph
::
shape
>&
inputs
)
const
{
migraph
::
check_shapes
{
inputs
}.
has
(
0
);
return
s
;
}
migraph
::
argument
compute
(
migraph
::
context
&
,
const
migraph
::
shape
&
output_shape
,
const
std
::
vector
<
migraph
::
argument
>&
)
const
{
return
{
output_shape
};
}
};
void
basic
()
{
migraph
::
program
p
;
auto
a1
=
p
.
add_instruction
(
allocate
{
migraph
::
shape
{
migraph
::
shape
::
float_type
,
{
8
}}});
auto
p1
=
p
.
add_instruction
(
pass_op
{},
a1
);
auto
a2
=
p
.
add_instruction
(
allocate
{
migraph
::
shape
{
migraph
::
shape
::
float_type
,
{
40
}}});
auto
p2
=
p
.
add_instruction
(
pass_op
{},
a2
,
p1
);
auto
a3
=
p
.
add_instruction
(
allocate
{
migraph
::
shape
{
migraph
::
shape
::
float_type
,
{
200
}}});
p
.
add_instruction
(
pass_op
{},
a3
,
p2
);
p
.
compile
(
eliminate_allocation_target
{});
EXPECT
(
p
.
get_shape
()
==
migraph
::
shape
{
migraph
::
shape
::
float_type
,
{
200
}});
EXPECT
(
p
.
get_parameter_shape
(
"memory"
).
bytes
()
==
(
8
*
4
+
40
*
4
+
200
*
4
));
}
void
aligned
()
{
migraph
::
program
p
;
auto
a1
=
p
.
add_instruction
(
allocate
{
migraph
::
shape
{
migraph
::
shape
::
float_type
,
{
1
}}});
auto
p1
=
p
.
add_instruction
(
pass_op
{},
a1
);
auto
a2
=
p
.
add_instruction
(
allocate
{
migraph
::
shape
{
migraph
::
shape
::
float_type
,
{
2
}}});
auto
p2
=
p
.
add_instruction
(
pass_op
{},
a2
,
p1
);
auto
a3
=
p
.
add_instruction
(
allocate
{
migraph
::
shape
{
migraph
::
shape
::
float_type
,
{
200
}}});
p
.
add_instruction
(
pass_op
{},
a3
,
p2
);
p
.
compile
(
eliminate_allocation_target
{});
EXPECT
(
p
.
get_shape
()
==
migraph
::
shape
{
migraph
::
shape
::
float_type
,
{
200
}});
EXPECT
(
p
.
get_parameter_shape
(
"memory"
).
bytes
()
==
(
32
+
32
+
200
*
4
));
}
void
unaligned
()
{
migraph
::
program
p
;
auto
a1
=
p
.
add_instruction
(
allocate
{
migraph
::
shape
{
migraph
::
shape
::
float_type
,
{
1
}}});
auto
p1
=
p
.
add_instruction
(
pass_op
{},
a1
);
auto
a2
=
p
.
add_instruction
(
allocate
{
migraph
::
shape
{
migraph
::
shape
::
float_type
,
{
2
}}});
auto
p2
=
p
.
add_instruction
(
pass_op
{},
a2
,
p1
);
auto
a3
=
p
.
add_instruction
(
allocate
{
migraph
::
shape
{
migraph
::
shape
::
float_type
,
{
200
}}});
p
.
add_instruction
(
pass_op
{},
a3
,
p2
);
p
.
compile
(
eliminate_allocation_target
{
1
});
EXPECT
(
p
.
get_shape
()
==
migraph
::
shape
{
migraph
::
shape
::
float_type
,
{
200
}});
EXPECT
(
p
.
get_parameter_shape
(
"memory"
).
bytes
()
==
(
1
*
4
+
2
*
4
+
200
*
4
));
}
void
float_aligned
()
{
migraph
::
program
p
;
auto
a1
=
p
.
add_instruction
(
allocate
{
migraph
::
shape
{
migraph
::
shape
::
float_type
,
{
1
}}});
auto
p1
=
p
.
add_instruction
(
pass_op
{},
a1
);
auto
a2
=
p
.
add_instruction
(
allocate
{
migraph
::
shape
{
migraph
::
shape
::
float_type
,
{
2
}}});
auto
p2
=
p
.
add_instruction
(
pass_op
{},
a2
,
p1
);
auto
a3
=
p
.
add_instruction
(
allocate
{
migraph
::
shape
{
migraph
::
shape
::
float_type
,
{
200
}}});
p
.
add_instruction
(
pass_op
{},
a3
,
p2
);
p
.
compile
(
eliminate_allocation_target
{
4
});
EXPECT
(
p
.
get_shape
()
==
migraph
::
shape
{
migraph
::
shape
::
float_type
,
{
200
}});
EXPECT
(
p
.
get_parameter_shape
(
"memory"
).
bytes
()
==
(
1
*
4
+
2
*
4
+
200
*
4
));
}
int
main
()
{
basic
();
aligned
();
unaligned
();
float_aligned
();
}
test/gpu/literal.cpp
0 → 100644
View file @
d9170e2d
#include <test.hpp>
#include <basic_ops.hpp>
#include <migraph/program.hpp>
#include <migraph/generate.hpp>
#include <migraph/gpu/target.hpp>
#include <migraph/gpu/hip.hpp>
void
gpu_literal_test
()
{
migraph
::
program
p
;
auto
lit
=
generate_literal
(
migraph
::
shape
{
migraph
::
shape
::
float_type
,
{
4
,
3
,
3
,
3
}});
p
.
add_literal
(
lit
);
p
.
compile
(
migraph
::
gpu
::
target
{});
auto
result
=
p
.
eval
({});
EXPECT
(
lit
==
migraph
::
gpu
::
from_gpu
(
result
));
}
int
main
()
{
gpu_literal_test
();
}
test/gpu/miopen.cpp
View file @
d9170e2d
...
@@ -8,7 +8,7 @@
...
@@ -8,7 +8,7 @@
#include <migraph/gpu/hip.hpp>
#include <migraph/gpu/hip.hpp>
#include <migraph/manage_ptr.hpp>
#include <migraph/manage_ptr.hpp>
#include <migraph/type_name.hpp>
#include <migraph/type_name.hpp>
#include <migraph/verify.hpp>
#include <migraph/verify
_args
.hpp>
#include <miopen/miopen.h>
#include <miopen/miopen.h>
...
@@ -77,6 +77,12 @@ struct auto_print
...
@@ -77,6 +77,12 @@ struct auto_print
};
};
std
::
array
<
std
::
function
<
void
()
>
,
2
>
auto_print
::
handlers
=
{};
std
::
array
<
std
::
function
<
void
()
>
,
2
>
auto_print
::
handlers
=
{};
template
<
class
T
>
auto
get_hash
(
const
T
&
x
)
{
return
std
::
hash
<
T
>
{}(
x
);
}
void
compile_check
(
migraph
::
program
&
p
,
const
migraph
::
target
&
t
)
void
compile_check
(
migraph
::
program
&
p
,
const
migraph
::
target
&
t
)
{
{
auto
name
=
t
.
name
();
auto
name
=
t
.
name
();
...
@@ -100,7 +106,7 @@ migraph::argument run_cpu()
...
@@ -100,7 +106,7 @@ migraph::argument run_cpu()
migraph
::
program
::
parameter_map
m
;
migraph
::
program
::
parameter_map
m
;
for
(
auto
&&
x
:
p
.
get_parameter_shapes
())
for
(
auto
&&
x
:
p
.
get_parameter_shapes
())
{
{
m
[
x
.
first
]
=
migraph
::
generate_argument
(
x
.
second
);
m
[
x
.
first
]
=
migraph
::
generate_argument
(
x
.
second
,
get_hash
(
x
.
first
)
);
}
}
return
p
.
eval
(
m
);
return
p
.
eval
(
m
);
}
}
...
@@ -112,52 +118,15 @@ migraph::argument run_gpu()
...
@@ -112,52 +118,15 @@ migraph::argument run_gpu()
auto
p
=
v
.
create_program
();
auto
p
=
v
.
create_program
();
auto_print
pp
{
p
,
1
};
auto_print
pp
{
p
,
1
};
compile_check
(
p
,
migraph
::
gpu
::
target
{});
compile_check
(
p
,
migraph
::
gpu
::
target
{});
migraph
::
program
::
parameter_map
m
;
migraph
::
program
::
parameter_map
m
;
for
(
auto
&&
x
:
p
.
get_parameter_shapes
())
for
(
auto
&&
x
:
p
.
get_parameter_shapes
())
{
{
m
[
x
.
first
]
=
migraph
::
gpu
::
to_gpu
(
migraph
::
generate_argument
(
x
.
second
));
m
[
x
.
first
]
=
migraph
::
gpu
::
to_gpu
(
migraph
::
generate_argument
(
x
.
second
,
get_hash
(
x
.
first
)
));
}
}
EXPECT
(
bool
{
m
.
find
(
"output"
)
!=
m
.
end
()});
return
migraph
::
gpu
::
from_gpu
(
p
.
eval
(
m
));
return
migraph
::
gpu
::
from_gpu
(
p
.
eval
(
m
));
}
}
void
verify_args
(
const
std
::
string
&
name
,
const
migraph
::
argument
&
cpu_arg
,
const
migraph
::
argument
&
gpu_arg
)
{
visit_all
(
cpu_arg
,
gpu_arg
)([
&
](
auto
cpu
,
auto
gpu
)
{
if
(
not
migraph
::
verify_range
(
cpu
,
gpu
))
{
// TODO: Check for nans
std
::
cout
<<
"FAILED: "
<<
name
<<
std
::
endl
;
// std::cout << cpu << std::endl;
// std::cout << gpu << std::endl;
if
(
migraph
::
range_zero
(
cpu
))
std
::
cout
<<
"Cpu data is all zeros"
<<
std
::
endl
;
if
(
migraph
::
range_zero
(
gpu
))
std
::
cout
<<
"Gpu data is all zeros"
<<
std
::
endl
;
auto
idx
=
migraph
::
mismatch_idx
(
cpu
,
gpu
,
migraph
::
float_equal
);
if
(
idx
<
migraph
::
range_distance
(
cpu
))
{
std
::
cout
<<
"Mismatch at "
<<
idx
<<
": "
<<
cpu
[
idx
]
<<
" != "
<<
gpu
[
idx
]
<<
std
::
endl
;
}
auto
cpu_nan_idx
=
find_idx
(
cpu
,
migraph
::
not_finite
);
if
(
cpu_nan_idx
>=
0
)
std
::
cout
<<
"Non finite number found in cpu at "
<<
cpu_nan_idx
<<
": "
<<
cpu
[
cpu_nan_idx
]
<<
std
::
endl
;
auto
gpu_nan_idx
=
find_idx
(
gpu
,
migraph
::
not_finite
);
if
(
gpu_nan_idx
>=
0
)
std
::
cout
<<
"Non finite number found in gpu at "
<<
gpu_nan_idx
<<
": "
<<
gpu
[
gpu_nan_idx
]
<<
std
::
endl
;
}
});
}
template
<
class
V
>
template
<
class
V
>
void
verify_program
()
void
verify_program
()
{
{
...
@@ -210,6 +179,75 @@ struct test_add_broadcast
...
@@ -210,6 +179,75 @@ struct test_add_broadcast
}
}
};
};
struct
test_add_broadcast2
{
migraph
::
program
create_program
()
const
{
migraph
::
program
p
;
migraph
::
shape
s
{
migraph
::
shape
::
float_type
,
{
3
}};
auto
x
=
p
.
add_parameter
(
"x"
,
{
migraph
::
shape
::
float_type
,
{
2
,
3
,
4
}});
auto
y
=
p
.
add_parameter
(
"y"
,
{
migraph
::
shape
::
float_type
,
{
3
}});
auto
by
=
p
.
add_instruction
(
migraph
::
broadcast
{
1
},
x
,
y
);
p
.
add_instruction
(
migraph
::
add
{},
x
,
by
);
return
p
;
}
};
struct
test_add_broadcast3
{
migraph
::
program
create_program
()
const
{
migraph
::
program
p
;
migraph
::
shape
s
{
migraph
::
shape
::
float_type
,
{
3
}};
auto
x
=
p
.
add_parameter
(
"x"
,
{
migraph
::
shape
::
float_type
,
{
2
,
4
,
5
}});
auto
y
=
p
.
add_parameter
(
"y"
,
{
migraph
::
shape
::
float_type
,
{
4
}});
auto
by
=
p
.
add_instruction
(
migraph
::
broadcast
{
1
},
x
,
y
);
p
.
add_instruction
(
migraph
::
add
{},
x
,
by
);
return
p
;
}
};
struct
test_add_broadcast4
{
migraph
::
program
create_program
()
const
{
migraph
::
program
p
;
migraph
::
shape
s
{
migraph
::
shape
::
float_type
,
{
3
}};
auto
x
=
p
.
add_parameter
(
"x"
,
{
migraph
::
shape
::
float_type
,
{
2
,
3
,
5
}});
auto
y
=
p
.
add_parameter
(
"y"
,
{
migraph
::
shape
::
float_type
,
{
3
}});
auto
by
=
p
.
add_instruction
(
migraph
::
broadcast
{
1
},
x
,
y
);
p
.
add_instruction
(
migraph
::
add
{},
x
,
by
);
return
p
;
}
};
struct
test_add_broadcast5
{
migraph
::
program
create_program
()
const
{
migraph
::
program
p
;
migraph
::
shape
s
{
migraph
::
shape
::
float_type
,
{
3
}};
auto
x
=
p
.
add_parameter
(
"x"
,
{
migraph
::
shape
::
float_type
,
{
2
,
4
,
8
}});
auto
y
=
p
.
add_parameter
(
"y"
,
{
migraph
::
shape
::
float_type
,
{
4
}});
auto
by
=
p
.
add_instruction
(
migraph
::
broadcast
{
1
},
x
,
y
);
p
.
add_instruction
(
migraph
::
add
{},
x
,
by
);
return
p
;
}
};
struct
test_conv
{
migraph
::
program
create_program
()
const
{
migraph
::
program
p
;
auto
input
=
p
.
add_parameter
(
"x"
,
migraph
::
shape
{
migraph
::
shape
::
float_type
,
{
4
,
3
,
3
,
3
}});
auto
weights
=
p
.
add_parameter
(
"w"
,
migraph
::
shape
{
migraph
::
shape
::
float_type
,
{
4
,
3
,
3
,
3
}});
p
.
add_instruction
(
migraph
::
convolution
{},
input
,
weights
);
return
p
;
}
};
struct
test_conv_relu
struct
test_conv_relu
{
{
migraph
::
program
create_program
()
const
migraph
::
program
create_program
()
const
...
@@ -414,10 +452,50 @@ struct test_conv_bn_relu_pooling
...
@@ -414,10 +452,50 @@ struct test_conv_bn_relu_pooling
}
}
};
};
struct
test_conv_bn_relu_pooling2
{
static
migraph
::
instruction_ref
add_bn
(
migraph
::
program
&
p
,
migraph
::
instruction_ref
x
,
std
::
size_t
channels
)
{
migraph
::
shape
vars
{
migraph
::
shape
::
float_type
,
{
channels
}};
auto
scale
=
p
.
add_literal
(
migraph
::
abs
(
migraph
::
generate_literal
(
vars
,
1
+
channels
)));
auto
bias
=
p
.
add_literal
(
migraph
::
abs
(
migraph
::
generate_literal
(
vars
,
2
+
channels
)));
auto
mean
=
p
.
add_literal
(
migraph
::
abs
(
migraph
::
generate_literal
(
vars
,
3
+
channels
)));
auto
variance
=
p
.
add_literal
(
migraph
::
abs
(
migraph
::
generate_literal
(
vars
,
4
+
channels
)));
return
p
.
add_instruction
(
migraph
::
batch_norm_inference
{},
x
,
scale
,
bias
,
mean
,
variance
);
}
migraph
::
program
create_program
()
const
{
migraph
::
program
p
;
migraph
::
shape
xs1
{
migraph
::
shape
::
float_type
,
{
1
,
512
,
7
,
7
}};
migraph
::
shape
xs2
{
migraph
::
shape
::
float_type
,
{
1
,
1024
,
14
,
14
}};
migraph
::
shape
ws1
{
migraph
::
shape
::
float_type
,
{
2048
,
512
,
1
,
1
}};
migraph
::
shape
ws2
{
migraph
::
shape
::
float_type
,
{
2048
,
1024
,
1
,
1
}};
auto
x1
=
p
.
add_parameter
(
"x1"
,
xs1
);
auto
w1
=
p
.
add_parameter
(
"w1"
,
ws1
);
auto
conv1
=
p
.
add_instruction
(
migraph
::
convolution
{{
0
,
0
},
{
1
,
1
},
{
1
,
1
}},
x1
,
w1
);
auto
bn1
=
add_bn
(
p
,
conv1
,
2048
);
auto
x2
=
p
.
add_parameter
(
"x2"
,
xs2
);
auto
w2
=
p
.
add_parameter
(
"w2"
,
ws2
);
auto
conv2
=
p
.
add_instruction
(
migraph
::
convolution
{{
0
,
0
},
{
2
,
2
},
{
1
,
1
}},
x2
,
w2
);
auto
bn2
=
add_bn
(
p
,
conv2
,
2048
);
auto
add
=
p
.
add_instruction
(
migraph
::
add
{},
bn1
,
bn2
);
auto
relu
=
p
.
add_instruction
(
migraph
::
activation
{
"relu"
},
add
);
p
.
add_instruction
(
migraph
::
pooling
{
"average"
,
{
1
,
1
},
{
2
,
2
},
{
3
,
3
}},
relu
);
return
p
;
}
};
int
main
()
int
main
()
{
{
verify_program
<
test_add
>
();
verify_program
<
test_add
>
();
verify_program
<
test_add_broadcast
>
();
verify_program
<
test_add_broadcast
>
();
verify_program
<
test_add_broadcast2
>
();
verify_program
<
test_add_broadcast3
>
();
verify_program
<
test_add_broadcast4
>
();
verify_program
<
test_add_broadcast5
>
();
verify_program
<
test_conv
>
();
verify_program
<
test_conv_relu
>
();
verify_program
<
test_conv_relu
>
();
verify_program
<
test_add_relu
>
();
verify_program
<
test_add_relu
>
();
verify_program
<
test_conv_pooling
>
();
verify_program
<
test_conv_pooling
>
();
...
@@ -431,4 +509,5 @@ int main()
...
@@ -431,4 +509,5 @@ int main()
verify_program
<
test_batchnorm_inference
>
();
verify_program
<
test_batchnorm_inference
>
();
verify_program
<
test_batchnorm_inference_2
>
();
verify_program
<
test_batchnorm_inference_2
>
();
verify_program
<
test_conv_bn_relu_pooling
>
();
verify_program
<
test_conv_bn_relu_pooling
>
();
verify_program
<
test_conv_bn_relu_pooling2
>
();
}
}
test/operation.cpp
View file @
d9170e2d
...
@@ -43,7 +43,9 @@ void operation_copy_test()
...
@@ -43,7 +43,9 @@ void operation_copy_test()
simple_operation
s
{};
simple_operation
s
{};
migraph
::
operation
op1
=
s
;
// NOLINT
migraph
::
operation
op1
=
s
;
// NOLINT
migraph
::
operation
op2
=
op1
;
// NOLINT
migraph
::
operation
op2
=
op1
;
// NOLINT
// cppcheck-suppress duplicateExpression
EXPECT
(
s
.
name
()
==
op1
.
name
());
EXPECT
(
s
.
name
()
==
op1
.
name
());
// cppcheck-suppress duplicateExpression
EXPECT
(
op2
.
name
()
==
op1
.
name
());
EXPECT
(
op2
.
name
()
==
op1
.
name
());
}
}
...
...
test/shape_test.cpp
View file @
d9170e2d
...
@@ -97,6 +97,72 @@ void test_shape4()
...
@@ -97,6 +97,72 @@ void test_shape4()
EXPECT
(
s
.
index
(
s
.
elements
()
-
1
)
==
s
.
elements
()
-
1
);
EXPECT
(
s
.
index
(
s
.
elements
()
-
1
)
==
s
.
elements
()
-
1
);
}
}
void
test_shape42
()
{
migraph
::
shape
s
{
migraph
::
shape
::
float_type
,
{
100
,
32
,
8
,
8
},
{
2048
,
64
,
8
,
1
}};
EXPECT
(
s
.
standard
());
EXPECT
(
s
.
packed
());
EXPECT
(
not
s
.
transposed
());
EXPECT
(
not
s
.
broadcasted
());
EXPECT
(
s
.
type
()
==
migraph
::
shape
::
float_type
);
EXPECT
(
s
.
lens
()[
0
]
==
100
);
EXPECT
(
s
.
lens
()[
1
]
==
32
);
EXPECT
(
s
.
lens
()[
2
]
==
8
);
EXPECT
(
s
.
lens
()[
3
]
==
8
);
EXPECT
(
s
.
strides
()[
0
]
==
s
.
lens
()[
1
]
*
s
.
strides
()[
1
]);
EXPECT
(
s
.
strides
()[
1
]
==
s
.
lens
()[
2
]
*
s
.
strides
()[
2
]);
EXPECT
(
s
.
strides
()[
2
]
==
s
.
lens
()[
3
]
*
s
.
strides
()[
3
]);
EXPECT
(
s
.
strides
()[
3
]
==
1
);
EXPECT
(
s
.
elements
()
==
100
*
32
*
8
*
8
);
EXPECT
(
s
.
bytes
()
==
100
*
32
*
8
*
8
*
sizeof
(
float
));
EXPECT
(
s
.
index
({
0
,
0
,
0
,
0
})
==
0
);
EXPECT
(
s
.
index
({
0
,
0
,
0
,
1
})
==
1
);
EXPECT
(
s
.
index
({
0
,
0
,
0
,
0
})
==
s
.
index
(
0
));
EXPECT
(
s
.
index
({
0
,
0
,
0
,
1
})
==
s
.
index
(
1
));
EXPECT
(
s
.
index
({
0
,
0
,
1
,
0
})
==
s
.
index
(
8
));
EXPECT
(
s
.
index
({
0
,
1
,
0
,
0
})
==
s
.
index
(
8
*
8
));
EXPECT
(
s
.
index
({
1
,
0
,
0
,
0
})
==
s
.
index
(
8
*
8
*
32
));
EXPECT
(
s
.
index
(
0
)
==
0
);
EXPECT
(
s
.
index
(
1
)
==
1
);
EXPECT
(
s
.
index
(
8
)
==
8
);
EXPECT
(
s
.
index
(
8
*
8
)
==
8
*
8
);
EXPECT
(
s
.
index
(
8
*
8
*
32
)
==
8
*
8
*
32
);
EXPECT
(
s
.
index
(
s
.
elements
()
-
1
)
==
s
.
elements
()
-
1
);
}
void
test_shape4_transposed
()
{
migraph
::
shape
s
{
migraph
::
shape
::
float_type
,
{
32
,
100
,
8
,
8
},
{
64
,
2048
,
8
,
1
}};
EXPECT
(
s
.
transposed
());
EXPECT
(
s
.
packed
());
EXPECT
(
not
s
.
standard
());
EXPECT
(
not
s
.
broadcasted
());
EXPECT
(
s
.
type
()
==
migraph
::
shape
::
float_type
);
EXPECT
(
s
.
lens
()[
0
]
==
32
);
EXPECT
(
s
.
lens
()[
1
]
==
100
);
EXPECT
(
s
.
lens
()[
2
]
==
8
);
EXPECT
(
s
.
lens
()[
3
]
==
8
);
EXPECT
(
s
.
strides
()[
0
]
==
64
);
EXPECT
(
s
.
strides
()[
1
]
==
2048
);
EXPECT
(
s
.
strides
()[
2
]
==
8
);
EXPECT
(
s
.
strides
()[
3
]
==
1
);
EXPECT
(
s
.
elements
()
==
100
*
32
*
8
*
8
);
EXPECT
(
s
.
bytes
()
==
100
*
32
*
8
*
8
*
sizeof
(
float
));
EXPECT
(
s
.
index
({
0
,
0
,
0
,
0
})
==
0
);
EXPECT
(
s
.
index
({
0
,
0
,
0
,
1
})
==
1
);
EXPECT
(
s
.
index
({
0
,
0
,
0
,
0
})
==
s
.
index
(
0
));
EXPECT
(
s
.
index
({
0
,
0
,
0
,
1
})
==
s
.
index
(
1
));
EXPECT
(
s
.
index
({
0
,
0
,
1
,
0
})
==
s
.
index
(
8
));
EXPECT
(
s
.
index
({
0
,
1
,
0
,
0
})
==
s
.
index
(
8
*
8
));
EXPECT
(
s
.
index
({
1
,
0
,
0
,
0
})
==
s
.
index
(
8
*
8
*
100
));
EXPECT
(
s
.
index
(
0
)
==
0
);
EXPECT
(
s
.
index
(
1
)
==
1
);
EXPECT
(
s
.
index
(
8
)
==
8
);
EXPECT
(
s
.
index
(
8
*
8
)
==
2048
);
EXPECT
(
s
.
index
(
8
*
8
*
100
)
==
64
);
EXPECT
(
s
.
index
(
s
.
elements
()
-
1
)
==
s
.
elements
()
-
1
);
}
void
test_shape4_nonpacked
()
void
test_shape4_nonpacked
()
{
{
std
::
vector
<
std
::
size_t
>
lens
=
{
100
,
32
,
8
,
8
};
std
::
vector
<
std
::
size_t
>
lens
=
{
100
,
32
,
8
,
8
};
...
@@ -134,11 +200,10 @@ void test_shape4_nonpacked()
...
@@ -134,11 +200,10 @@ void test_shape4_nonpacked()
EXPECT
(
s
.
index
(
1
)
==
1
);
EXPECT
(
s
.
index
(
1
)
==
1
);
EXPECT
(
s
.
index
({
0
,
0
,
0
,
0
})
==
0
);
EXPECT
(
s
.
index
({
0
,
0
,
0
,
0
})
==
0
);
EXPECT
(
s
.
index
({
0
,
0
,
0
,
1
})
==
s
.
index
(
1
));
EXPECT
(
s
.
index
({
0
,
0
,
0
,
1
})
==
s
.
index
(
1
));
// TODO: Fix these tests
EXPECT
(
s
.
index
({
0
,
0
,
1
,
0
})
==
s
.
index
(
8
));
// EXPECT(s.index({0, 0, 1, 0}) == s.index(8));
EXPECT
(
s
.
index
({
0
,
1
,
0
,
0
})
==
s
.
index
(
8
*
8
));
// EXPECT(s.index({0, 1, 0, 0}) == s.index(8 * 8));
EXPECT
(
s
.
index
({
1
,
0
,
0
,
0
})
==
s
.
index
(
8
*
8
*
32
));
// EXPECT(s.index({1, 0, 0, 0}) == s.index(8 * 8 * 32));
EXPECT
(
s
.
index
(
s
.
elements
()
-
1
)
==
469273
);
// EXPECT(s.index(s.elements() - 1) == 469273);
}
}
int
main
()
int
main
()
...
@@ -151,5 +216,7 @@ int main()
...
@@ -151,5 +216,7 @@ int main()
test_shape_broadcasted
();
test_shape_broadcasted
();
test_shape_default_copy
();
test_shape_default_copy
();
test_shape4
();
test_shape4
();
test_shape42
();
test_shape4_transposed
();
test_shape4_nonpacked
();
test_shape4_nonpacked
();
}
}
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment