Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
d9170e2d
Commit
d9170e2d
authored
Sep 12, 2018
by
Paul
Browse files
Merge branch 'master' into im2col_cpu
parents
674ea92d
9fee0fe4
Changes
38
Hide whitespace changes
Inline
Side-by-side
Showing
18 changed files
with
624 additions
and
98 deletions
+624
-98
src/program.cpp
src/program.cpp
+1
-0
src/shape.cpp
src/shape.cpp
+14
-9
src/targets/gpu/CMakeLists.txt
src/targets/gpu/CMakeLists.txt
+1
-1
src/targets/gpu/device/add.cpp
src/targets/gpu/device/add.cpp
+15
-0
src/targets/gpu/device/add_relu.cpp
src/targets/gpu/device/add_relu.cpp
+2
-3
src/targets/gpu/device/include/migraph/gpu/device/launch.hpp
src/targets/gpu/device/include/migraph/gpu/device/launch.hpp
+10
-2
src/targets/gpu/device/include/migraph/gpu/device/nary.hpp
src/targets/gpu/device/include/migraph/gpu/device/nary.hpp
+205
-23
src/targets/gpu/device/include/migraph/gpu/device/tensor.hpp
src/targets/gpu/device/include/migraph/gpu/device/tensor.hpp
+6
-6
src/targets/gpu/fuse_ops.cpp
src/targets/gpu/fuse_ops.cpp
+5
-2
src/targets/gpu/include/migraph/gpu/device/add.hpp
src/targets/gpu/include/migraph/gpu/device/add.hpp
+17
-0
src/targets/gpu/include/migraph/gpu/device/add_relu.hpp
src/targets/gpu/include/migraph/gpu/device/add_relu.hpp
+1
-1
src/targets/gpu/lowering.cpp
src/targets/gpu/lowering.cpp
+21
-3
src/targets/gpu/target.cpp
src/targets/gpu/target.cpp
+5
-2
test/eliminate_allocation_test.cpp
test/eliminate_allocation_test.cpp
+109
-0
test/gpu/literal.cpp
test/gpu/literal.cpp
+18
-0
test/gpu/miopen.cpp
test/gpu/miopen.cpp
+120
-41
test/operation.cpp
test/operation.cpp
+2
-0
test/shape_test.cpp
test/shape_test.cpp
+72
-5
No files found.
src/program.cpp
View file @
d9170e2d
...
@@ -179,6 +179,7 @@ instruction_ref program::add_outline(const shape& s)
...
@@ -179,6 +179,7 @@ instruction_ref program::add_outline(const shape& s)
instruction_ref
program
::
add_parameter
(
std
::
string
name
,
shape
s
)
instruction_ref
program
::
add_parameter
(
std
::
string
name
,
shape
s
)
{
{
assert
(
get_parameter_shape
(
name
)
==
shape
{});
impl
->
instructions
.
push_front
({
builtin
::
param
{
std
::
move
(
name
)},
std
::
move
(
s
),
{}});
impl
->
instructions
.
push_front
({
builtin
::
param
{
std
::
move
(
name
)},
std
::
move
(
s
),
{}});
return
impl
->
instructions
.
begin
();
return
impl
->
instructions
.
begin
();
}
}
...
...
src/shape.cpp
View file @
d9170e2d
...
@@ -116,15 +116,20 @@ std::size_t shape::index(std::size_t i) const
...
@@ -116,15 +116,20 @@ std::size_t shape::index(std::size_t i) const
if
(
this
->
standard
())
if
(
this
->
standard
())
return
i
;
return
i
;
else
else
return
std
::
inner_product
(
this
->
lens
().
begin
(),
{
this
->
lens
().
end
(),
std
::
size_t
s
=
1
;
this
->
strides
().
begin
(),
std
::
size_t
result
=
0
;
std
::
size_t
{
0
},
for
(
std
::
size_t
j
=
0
;
j
<
this
->
lens
().
size
();
j
++
)
std
::
plus
<
std
::
size_t
>
{},
{
[
&
](
std
::
size_t
len
,
std
::
size_t
stride
)
{
const
std
::
size_t
k
=
this
->
lens
().
size
()
-
j
-
1
;
assert
(
stride
>
0
and
len
>
0
);
const
std
::
size_t
stride
=
this
->
strides
()[
k
];
return
((
i
/
stride
)
%
len
)
*
stride
;
const
std
::
size_t
len
=
this
->
lens
()[
k
];
});
const
std
::
size_t
idx
=
(
i
%
(
s
*
len
))
/
s
;
result
+=
stride
*
idx
;
s
*=
len
;
}
return
result
;
}
}
}
bool
shape
::
packed
()
const
{
return
this
->
elements
()
==
this
->
element_space
();
}
bool
shape
::
packed
()
const
{
return
this
->
elements
()
==
this
->
element_space
();
}
...
...
src/targets/gpu/CMakeLists.txt
View file @
d9170e2d
...
@@ -11,6 +11,7 @@ if(NOT TARGET MIOpen)
...
@@ -11,6 +11,7 @@ if(NOT TARGET MIOpen)
endif
()
endif
()
add_library
(
migraph_device
add_library
(
migraph_device
device/add.cpp
device/add_relu.cpp
device/add_relu.cpp
device/contiguous.cpp
device/contiguous.cpp
)
)
...
@@ -20,7 +21,6 @@ target_include_directories(migraph_device PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRE
...
@@ -20,7 +21,6 @@ target_include_directories(migraph_device PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRE
target_include_directories
(
migraph_device PRIVATE $<BUILD_INTERFACE:
${
CMAKE_CURRENT_SOURCE_DIR
}
/device/include>
)
target_include_directories
(
migraph_device PRIVATE $<BUILD_INTERFACE:
${
CMAKE_CURRENT_SOURCE_DIR
}
/device/include>
)
add_library
(
migraph_gpu
add_library
(
migraph_gpu
eliminate_allocation.cpp
eliminate_workspace.cpp
eliminate_workspace.cpp
fuse_ops.cpp
fuse_ops.cpp
hip.cpp
hip.cpp
...
...
src/targets/gpu/device/add.cpp
0 → 100644
View file @
d9170e2d
#include <migraph/gpu/device/add.hpp>
#include <migraph/gpu/device/nary.hpp>
namespace
migraph
{
namespace
gpu
{
namespace
device
{
void
add
(
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
)
{
nary
(
result
,
arg1
,
arg2
)([](
auto
x
,
auto
y
)
{
return
x
+
y
;
});
}
}
// namespace device
}
// namespace gpu
}
// namespace migraph
src/targets/gpu/device/add_relu.cpp
View file @
d9170e2d
...
@@ -5,10 +5,9 @@ namespace migraph {
...
@@ -5,10 +5,9 @@ namespace migraph {
namespace
gpu
{
namespace
gpu
{
namespace
device
{
namespace
device
{
void
add_relu
(
argument
result
,
argument
arg1
,
argument
arg2
)
void
add_relu
(
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
)
{
{
nary_standard
(
std
::
move
(
result
),
std
::
move
(
arg1
),
std
::
move
(
arg2
))(
nary
(
result
,
arg1
,
arg2
)([](
auto
x
,
auto
y
)
{
return
std
::
max
<
decltype
(
x
+
y
)
>
(
0
,
x
+
y
);
});
[](
auto
x
,
auto
y
)
{
return
max
(
0
,
x
+
y
);
});
}
}
}
// namespace device
}
// namespace device
...
...
src/targets/gpu/device/include/migraph/gpu/device/launch.hpp
View file @
d9170e2d
...
@@ -33,10 +33,10 @@ inline auto launch(std::size_t global, std::size_t local)
...
@@ -33,10 +33,10 @@ inline auto launch(std::size_t global, std::size_t local)
};
};
}
}
inline
auto
gs_launch
(
std
::
size_t
n
,
std
::
size_t
local
=
512
)
inline
auto
gs_launch
(
std
::
size_t
n
,
std
::
size_t
local
=
1024
)
{
{
std
::
size_t
groups
=
1
+
n
/
local
;
std
::
size_t
groups
=
1
+
n
/
local
;
std
::
size_t
nglobal
=
std
::
min
<
std
::
size_t
>
(
512
,
groups
)
*
local
;
std
::
size_t
nglobal
=
std
::
min
<
std
::
size_t
>
(
256
,
groups
)
*
local
;
return
[
=
](
auto
f
)
{
return
[
=
](
auto
f
)
{
launch
(
nglobal
,
local
)([
=
](
auto
idx
)
{
launch
(
nglobal
,
local
)([
=
](
auto
idx
)
{
...
@@ -48,6 +48,14 @@ inline auto gs_launch(std::size_t n, std::size_t local = 512)
...
@@ -48,6 +48,14 @@ inline auto gs_launch(std::size_t n, std::size_t local = 512)
};
};
}
}
// Workaround hcc's broken tile_static macro
#ifdef tile_static
#undef tile_static
#define MIGRAPH_DEVICE_SHARED __attribute__((tile_static))
#else
#define MIGRAPH_DEVICE_SHARED __shared__
#endif
}
// namespace device
}
// namespace device
}
// namespace gpu
}
// namespace gpu
}
// namespace migraph
}
// namespace migraph
...
...
src/targets/gpu/device/include/migraph/gpu/device/nary.hpp
View file @
d9170e2d
...
@@ -10,16 +10,25 @@ namespace migraph {
...
@@ -10,16 +10,25 @@ namespace migraph {
namespace
gpu
{
namespace
gpu
{
namespace
device
{
namespace
device
{
template
<
class
...
Arguments
>
template
<
class
T
>
auto
nary
(
argument
result
,
Arguments
...
args
)
using
vec4
=
T
__attribute__
((
ext_vector_type
(
4
)));
template
<
class
T
>
__device__
__host__
vec4
<
T
>*
as_vec4
(
T
*
x
)
{
{
return
[
=
](
auto
f
)
{
return
reinterpret_cast
<
vec4
<
T
>*>
(
x
);
if
(
all_of
({
args
...},
[](
const
shape
&
s
)
{
return
s
.
standard
();
}))
}
nary_standard
(
result
,
args
...)(
f
);
else
nary_nonstandard
(
result
,
args
...)(
f
);
};
template
<
class
T
>
__device__
__host__
T
*
as_pointer
(
vec4
<
T
>*
x
)
{
return
reinterpret_cast
<
T
*>
(
x
);
}
template
<
class
...
Ts
>
auto
pack_vec4
(
Ts
...
xs
)
{
return
[
=
](
auto
f
,
std
::
size_t
n
)
{
return
f
(
as_vec4
(
xs
)[
n
]...);
};
}
}
template
<
class
F
,
class
...
Arguments
>
template
<
class
F
,
class
...
Arguments
>
...
@@ -28,14 +37,12 @@ auto nary_nonstandard_impl(F f, argument result, Arguments... args)
...
@@ -28,14 +37,12 @@ auto nary_nonstandard_impl(F f, argument result, Arguments... args)
const
auto
&
output_shape
=
result
.
get_shape
();
const
auto
&
output_shape
=
result
.
get_shape
();
visit_all
(
result
,
args
...)([
&
](
auto
output
,
auto
...
inputs
)
{
visit_all
(
result
,
args
...)([
&
](
auto
output
,
auto
...
inputs
)
{
visit_tensor_size
(
output_shape
.
lens
().
size
(),
[
&
](
auto
ndim
)
{
visit_tensor_size
(
output_shape
.
lens
().
size
(),
[
&
](
auto
ndim
)
{
auto
data
=
make_sequence
(
auto
data
=
pack
(
std
::
make_pair
(
hip_tensor_descriptor
<
ndim
>
{
inputs
.
get_shape
().
lens
(),
std
::
make_pair
(
hip_tensor_descriptor
<
ndim
>
{
inputs
.
get_shape
()},
inputs
.
data
())...);
inputs
.
get_shape
().
strides
()},
hip_tensor_descriptor
<
ndim
>
out_desc
(
output_shape
);
inputs
.
data
())...);
hip_tensor_descriptor
<
ndim
>
out_desc
(
output_shape
.
lens
(),
output_shape
.
strides
());
auto
*
outp
=
output
.
data
();
auto
*
outp
=
output
.
data
();
gs_launch
(
output_shape
.
elements
())([
=
](
auto
i
)
{
gs_launch
(
output_shape
.
elements
())([
=
](
auto
i
)
{
data
([
&
](
auto
...
ps
)
{
data
([
&
](
auto
&&
...
ps
)
{
auto
outidx
=
out_desc
.
multi
(
i
);
auto
outidx
=
out_desc
.
multi
(
i
);
outp
[
i
]
=
f
(
ps
.
second
[
ps
.
first
.
linear
(
outidx
)]...);
outp
[
i
]
=
f
(
ps
.
second
[
ps
.
first
.
linear
(
outidx
)]...);
});
});
...
@@ -44,24 +51,199 @@ auto nary_nonstandard_impl(F f, argument result, Arguments... args)
...
@@ -44,24 +51,199 @@ auto nary_nonstandard_impl(F f, argument result, Arguments... args)
});
});
}
}
template
<
class
F
>
void
binary_broadcast_vec_impl
(
F
f
,
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
)
{
const
auto
&
output_shape
=
result
.
get_shape
();
const
auto
&
b_shape
=
arg2
.
get_shape
();
auto
bdim
=
std
::
distance
(
b_shape
.
strides
().
begin
(),
std
::
find_if
(
b_shape
.
strides
().
begin
(),
b_shape
.
strides
().
end
(),
[](
auto
x
)
{
return
x
!=
0
;
}));
auto
bdim_len
=
output_shape
.
lens
()[
bdim
];
auto
bdim_stride
=
output_shape
.
strides
()[
bdim
];
auto
bdim_next_stride
=
bdim_stride
*
bdim_len
;
visit_all
(
result
,
arg1
,
arg2
)([
&
](
auto
output
,
auto
input1
,
auto
input2
)
{
using
type
=
std
::
remove_cv_t
<
typename
decltype
(
output
)
::
value_type
>
;
auto
*
xp
=
as_vec4
(
input1
.
data
());
auto
*
yp
=
as_vec4
(
input2
.
data
());
auto
*
outp
=
as_vec4
(
output
.
data
());
const
std
::
size_t
vec_size
=
4
;
const
std
::
size_t
nlocal
=
1024
;
const
std
::
size_t
nglobal
=
256
*
nlocal
;
const
std
::
size_t
n
=
output
.
size
()
/
vec_size
;
const
std
::
size_t
bdim_vec_len
=
bdim_len
/
vec_size
;
launch
(
nglobal
,
nlocal
)([
=
](
auto
idx
)
__device__
{
MIGRAPH_DEVICE_SHARED
vec4
<
type
>
buffer
[
2048
/
vec_size
];
// Load bias into LDS
for
(
size_t
i
=
idx
.
local
;
i
<
bdim_vec_len
;
i
+=
nlocal
)
{
buffer
[
i
]
=
yp
[
i
];
}
__syncthreads
();
auto
*
bp
=
as_pointer
(
buffer
);
// Process the data
for
(
size_t
i
=
idx
.
global
;
i
<
n
;
i
+=
nglobal
)
{
auto
bidx
=
((
i
*
vec_size
)
%
bdim_next_stride
)
/
bdim_stride
;
auto
b
=
bp
[
bidx
];
vec4
<
type
>
x
=
xp
[
i
];
vec4
<
type
>
out
=
outp
[
i
];
for
(
std
::
size_t
j
=
0
;
j
<
vec_size
;
j
++
)
{
out
[
j
]
=
f
(
x
[
j
],
b
);
}
outp
[
i
]
=
out
;
}
});
});
}
template
<
class
F
>
void
binary_broadcast_impl
(
F
f
,
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
)
{
const
auto
&
output_shape
=
result
.
get_shape
();
const
auto
&
b_shape
=
arg2
.
get_shape
();
auto
bdim
=
std
::
distance
(
b_shape
.
strides
().
begin
(),
std
::
find_if
(
b_shape
.
strides
().
begin
(),
b_shape
.
strides
().
end
(),
[](
auto
x
)
{
return
x
!=
0
;
}));
auto
bdim_len
=
output_shape
.
lens
()[
bdim
];
auto
bdim_stride
=
output_shape
.
strides
()[
bdim
];
auto
bdim_next_stride
=
bdim_stride
*
bdim_len
;
visit_all
(
result
,
arg1
,
arg2
)([
&
](
auto
output
,
auto
input1
,
auto
input2
)
{
using
type
=
std
::
remove_cv_t
<
typename
decltype
(
output
)
::
value_type
>
;
auto
*
xp
=
input1
.
data
();
auto
*
yp
=
input2
.
data
();
auto
*
outp
=
output
.
data
();
const
std
::
size_t
nlocal
=
1024
;
const
std
::
size_t
nglobal
=
256
*
nlocal
;
const
std
::
size_t
n
=
output
.
size
();
launch
(
nglobal
,
nlocal
)([
=
](
auto
idx
)
__device__
{
MIGRAPH_DEVICE_SHARED
type
buffer
[
2048
];
// Load bias into LDS
for
(
size_t
i
=
idx
.
local
;
i
<
bdim_len
;
i
+=
nlocal
)
{
buffer
[
i
]
=
yp
[
i
];
}
__syncthreads
();
// Process the data
for
(
size_t
i
=
idx
.
global
;
i
<
n
;
i
+=
nglobal
)
{
auto
bidx
=
(
i
%
bdim_next_stride
)
/
bdim_stride
;
auto
b
=
buffer
[
bidx
];
type
x
=
xp
[
i
];
outp
[
i
]
=
f
(
x
,
b
);
}
});
});
}
template
<
class
F
,
class
...
Arguments
>
void
nary_standard_vec_impl
(
F
f
,
argument
result
,
Arguments
...
args
)
{
// assert(x.get_shape().elements() == y.get_shape().elements());
const
auto
&
output_shape
=
result
.
get_shape
();
visit_all
(
result
,
args
...)([
&
](
auto
output
,
auto
...
inputs
)
{
using
type
=
std
::
remove_cv_t
<
typename
decltype
(
output
)
::
value_type
>
;
const
std
::
size_t
vec_size
=
4
;
auto
data
=
pack_vec4
(
inputs
.
data
()...);
auto
*
outp
=
as_vec4
(
output
.
data
());
gs_launch
(
output_shape
.
elements
()
/
vec_size
)([
=
](
auto
i
)
{
vec4
<
type
>
out
=
outp
[
i
];
data
(
[
&
](
auto
...
xs
)
{
for
(
std
::
size_t
j
=
0
;
j
<
vec_size
;
j
++
)
{
out
[
j
]
=
f
(
xs
[
j
]...);
}
},
i
);
outp
[
i
]
=
out
;
});
});
}
template
<
class
F
,
class
...
Arguments
>
void
nary_standard_impl
(
F
f
,
argument
result
,
Arguments
...
args
)
{
// assert(x.get_shape().elements() == y.get_shape().elements());
const
auto
&
output_shape
=
result
.
get_shape
();
visit_all
(
result
,
args
...)([
&
](
auto
output
,
auto
...
inputs
)
{
auto
data
=
pack
(
inputs
.
data
()...);
auto
*
outp
=
output
.
data
();
gs_launch
(
output_shape
.
elements
())(
[
=
](
auto
i
)
{
data
([
&
](
auto
...
xps
)
{
outp
[
i
]
=
f
(
xps
[
i
]...);
});
});
});
}
template
<
class
F
,
class
...
Arguments
>
void
nary_impl
(
F
f
,
argument
result
,
Arguments
...
args
)
{
bool
standard
=
all_of
({
args
.
get_shape
()...},
[](
const
shape
&
s
)
{
return
s
.
standard
();
});
bool
packed
=
all_of
({
args
.
get_shape
()...},
[](
const
shape
&
s
)
{
return
s
.
packed
();
});
bool
same_shapes
=
all_of
({
args
.
get_shape
()...},
[
&
](
const
shape
&
s
)
{
return
s
==
result
.
get_shape
();
});
if
(
standard
or
(
packed
and
same_shapes
))
nary_standard_impl
(
f
,
result
,
args
...);
else
nary_nonstandard_impl
(
f
,
result
,
args
...);
}
template
<
class
...
Arguments
>
template
<
class
...
Arguments
>
auto
nary_nonstandard
(
argument
result
,
Arguments
...
args
)
auto
nary_nonstandard
(
argument
result
,
Arguments
...
args
)
{
{
return
[
=
](
auto
f
)
{
return
nary_nonstandard_impl
(
f
,
result
,
args
...);
};
return
[
=
](
auto
f
)
{
nary_nonstandard_impl
(
f
,
result
,
args
...);
};
}
}
template
<
class
...
Arguments
>
template
<
class
...
Arguments
>
auto
nary_standard
(
argument
result
,
Arguments
...
args
)
auto
nary_standard
(
argument
result
,
Arguments
...
args
)
{
return
[
=
](
auto
f
)
{
nary_standard_impl
(
f
,
result
,
args
...);
};
}
template
<
class
...
Arguments
>
auto
nary
(
argument
result
,
Arguments
...
args
)
{
return
[
=
](
auto
f
)
{
nary_impl
(
f
,
result
,
args
...);
};
}
inline
auto
nary
(
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
)
{
{
return
[
=
](
auto
f
)
{
return
[
=
](
auto
f
)
{
// assert(x.get_shape().elements() == y.get_shape().elements());
// TODO: Check result and arg1 shape is the same
const
auto
&
output_shape
=
result
.
get_shape
();
if
(
arg1
.
get_shape
().
standard
()
and
arg2
.
get_shape
().
broadcasted
())
visit_all
(
result
,
args
...)([
&
](
auto
output
,
auto
...
inputs
)
{
{
auto
data
=
make_sequence
(
inputs
.
data
()...);
auto
not_zero
=
[](
auto
x
)
{
return
x
!=
0
;
};
auto
*
outp
=
output
.
data
();
const
auto
&
strides
=
arg2
.
get_shape
().
strides
();
gs_launch
(
output_shape
.
elements
())(
auto
b_it
=
std
::
find_if
(
strides
.
begin
(),
strides
.
end
(),
not_zero
);
[
=
](
auto
i
)
{
data
([
&
](
auto
...
xps
)
{
outp
[
i
]
=
f
(
xps
[
i
]...);
});
});
auto
b_idx
=
std
::
distance
(
strides
.
begin
(),
b_it
);
});
auto
b_len
=
result
.
get_shape
().
lens
()[
b_idx
];
auto
b_stride
=
result
.
get_shape
().
strides
()[
b_idx
];
assert
(
arg2
.
get_shape
().
lens
()[
b_idx
]
==
b_len
);
if
(
b_len
<=
2048
and
std
::
none_of
(
std
::
next
(
b_it
),
strides
.
end
(),
not_zero
))
{
const
bool
divisible_by_4
=
(
b_len
%
4
==
0
)
and
(
b_stride
%
4
==
0
)
and
(
arg1
.
get_shape
().
elements
()
%
4
==
0
);
if
(
divisible_by_4
)
binary_broadcast_vec_impl
(
f
,
result
,
arg1
,
arg2
);
else
binary_broadcast_impl
(
f
,
result
,
arg1
,
arg2
);
return
;
}
}
nary_impl
(
f
,
result
,
arg1
,
arg2
);
};
};
}
}
...
...
src/targets/gpu/device/include/migraph/gpu/device/tensor.hpp
View file @
d9170e2d
...
@@ -2,6 +2,7 @@
...
@@ -2,6 +2,7 @@
#define MIGRAPH_GUARD_RTGLIB_DEAVICE_TENSOR_HPP
#define MIGRAPH_GUARD_RTGLIB_DEAVICE_TENSOR_HPP
#include <hip/hip_runtime.h>
#include <hip/hip_runtime.h>
#include <migraph/functional.hpp>
namespace
migraph
{
namespace
migraph
{
namespace
gpu
{
namespace
gpu
{
...
@@ -53,14 +54,13 @@ template <size_t NDim>
...
@@ -53,14 +54,13 @@ template <size_t NDim>
struct
hip_tensor_descriptor
struct
hip_tensor_descriptor
{
{
__device__
__host__
hip_tensor_descriptor
()
=
default
;
__device__
__host__
hip_tensor_descriptor
()
=
default
;
template
<
typename
T
,
typename
V
>
__device__
__host__
hip_tensor_descriptor
(
const
T
&
lens_ext
,
const
V
&
strides_ext
)
hip_tensor_descriptor
(
const
shape
&
s
)
{
{
for
(
size_t
i
=
0
;
i
<
NDim
;
i
++
)
std
::
copy
(
s
.
lens
().
begin
(),
s
.
lens
().
end
(),
lens
);
lens
[
i
]
=
lens_ext
[
i
];
std
::
copy
(
s
.
strides
().
begin
(),
s
.
strides
().
end
(),
strides
);
for
(
size_t
i
=
0
;
i
<
NDim
;
i
++
)
strides
[
i
]
=
strides_ext
[
i
];
}
}
__device__
__host__
hip_index
<
NDim
>
multi
(
size_t
idx
)
const
__device__
__host__
hip_index
<
NDim
>
multi
(
size_t
idx
)
const
{
{
hip_index
<
NDim
>
result
{};
hip_index
<
NDim
>
result
{};
...
...
src/targets/gpu/fuse_ops.cpp
View file @
d9170e2d
...
@@ -12,7 +12,7 @@ struct hip_add_relu
...
@@ -12,7 +12,7 @@ struct hip_add_relu
std
::
string
name
()
const
{
return
"hip::add_relu"
;
}
std
::
string
name
()
const
{
return
"hip::add_relu"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
{
check_shapes
{
inputs
}.
has
(
3
).
standard
(
);
check_shapes
{
inputs
,
*
this
}.
has
(
3
);
return
inputs
.
front
();
return
inputs
.
front
();
}
}
argument
compute
(
context
&
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
argument
compute
(
context
&
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
...
@@ -31,7 +31,10 @@ void fuse_ops::apply(program& p) const
...
@@ -31,7 +31,10 @@ void fuse_ops::apply(program& p) const
auto
add_ins
=
ins
->
arguments
.
front
();
auto
add_ins
=
ins
->
arguments
.
front
();
if
(
add_ins
->
op
.
name
()
!=
"gpu::add"
)
if
(
add_ins
->
op
.
name
()
!=
"gpu::add"
)
continue
;
continue
;
p
.
replace_instruction
(
ins
,
hip_add_relu
{},
add_ins
->
arguments
);
auto
args
=
add_ins
->
arguments
;
// Use the allocation from the relu operator
args
.
back
()
=
ins
->
arguments
.
back
();
p
.
replace_instruction
(
ins
,
hip_add_relu
{},
args
);
}
}
}
}
...
...
src/targets/gpu/include/migraph/gpu/device/add.hpp
0 → 100644
View file @
d9170e2d
#ifndef MIGRAPH_GUARD_RTGLIB_DEVICE_ADD_HPP
#define MIGRAPH_GUARD_RTGLIB_DEVICE_ADD_HPP
#include <migraph/argument.hpp>
namespace
migraph
{
namespace
gpu
{
namespace
device
{
void
add
(
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
);
}
// namespace device
}
// namespace gpu
}
// namespace migraph
#endif
src/targets/gpu/include/migraph/gpu/device/add_relu.hpp
View file @
d9170e2d
...
@@ -8,7 +8,7 @@ namespace migraph {
...
@@ -8,7 +8,7 @@ namespace migraph {
namespace
gpu
{
namespace
gpu
{
namespace
device
{
namespace
device
{
void
add_relu
(
argument
result
,
argument
arg1
,
argument
arg2
);
void
add_relu
(
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
);
}
// namespace device
}
// namespace device
}
// namespace gpu
}
// namespace gpu
...
...
src/targets/gpu/lowering.cpp
View file @
d9170e2d
...
@@ -9,6 +9,7 @@
...
@@ -9,6 +9,7 @@
#include <migraph/gpu/hip.hpp>
#include <migraph/gpu/hip.hpp>
#include <migraph/dfor.hpp>
#include <migraph/dfor.hpp>
#include <migraph/gpu/device/contiguous.hpp>
#include <migraph/gpu/device/contiguous.hpp>
#include <migraph/gpu/device/add.hpp>
#include <migraph/iterator_for.hpp>
#include <migraph/iterator_for.hpp>
#include <migraph/gpu/rocblas.hpp>
#include <migraph/gpu/rocblas.hpp>
#include <migraph/gpu/context.hpp>
#include <migraph/gpu/context.hpp>
...
@@ -168,6 +169,23 @@ struct miopen_pooling
...
@@ -168,6 +169,23 @@ struct miopen_pooling
}
}
};
};
struct
hip_add
{
std
::
string
name
()
const
{
return
"gpu::add"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
// check_shapes{inputs, *this}.has(3).standard();
check_shapes
{
inputs
,
*
this
}.
has
(
3
);
return
inputs
.
at
(
0
);
}
argument
compute
(
context
&
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
{
device
::
add
(
args
[
2
],
args
[
0
],
args
[
1
]);
return
args
[
2
];
}
};
struct
miopen_add
struct
miopen_add
{
{
std
::
string
name
()
const
{
return
"gpu::add"
;
}
std
::
string
name
()
const
{
return
"gpu::add"
;
}
...
@@ -202,7 +220,7 @@ struct miopen_add
...
@@ -202,7 +220,7 @@ struct miopen_add
struct
miopen_gemm
struct
miopen_gemm
{
{
gemm
op
;
gemm
op
;
std
::
string
name
()
const
{
return
"gpu::
convolution
"
;
}
std
::
string
name
()
const
{
return
"gpu::
gemm
"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
{
check_shapes
{
inputs
,
*
this
}.
has
(
3
);
check_shapes
{
inputs
,
*
this
}.
has
(
3
);
...
@@ -337,7 +355,7 @@ struct miopen_apply
...
@@ -337,7 +355,7 @@ struct miopen_apply
instruction_ref
insert_allocation
(
instruction_ref
ins
,
const
shape
&
s
,
std
::
string
tag
=
""
)
instruction_ref
insert_allocation
(
instruction_ref
ins
,
const
shape
&
s
,
std
::
string
tag
=
""
)
{
{
if
(
ins
==
--
prog
->
end
())
if
(
ins
==
--
prog
->
end
()
and
tag
.
empty
()
)
{
{
return
prog
->
add_parameter
(
"output"
,
s
);
return
prog
->
add_parameter
(
"output"
,
s
);
}
}
...
@@ -390,7 +408,7 @@ struct miopen_apply
...
@@ -390,7 +408,7 @@ struct miopen_apply
{
{
auto
output
=
insert_allocation
(
ins
,
ins
->
result
);
auto
output
=
insert_allocation
(
ins
,
ins
->
result
);
return
prog
->
replace_instruction
(
return
prog
->
replace_instruction
(
ins
,
miopen
_add
{},
ins
->
arguments
.
at
(
0
),
ins
->
arguments
.
at
(
1
),
output
);
ins
,
hip
_add
{},
ins
->
arguments
.
at
(
0
),
ins
->
arguments
.
at
(
1
),
output
);
}
}
instruction_ref
apply_gemm
(
instruction_ref
ins
)
instruction_ref
apply_gemm
(
instruction_ref
ins
)
...
...
src/targets/gpu/target.cpp
View file @
d9170e2d
...
@@ -3,13 +3,14 @@
...
@@ -3,13 +3,14 @@
#include <migraph/gpu/write_literals.hpp>
#include <migraph/gpu/write_literals.hpp>
#include <migraph/gpu/context.hpp>
#include <migraph/gpu/context.hpp>
#include <migraph/gpu/eliminate_workspace.hpp>
#include <migraph/gpu/eliminate_workspace.hpp>
#include <migraph/
gpu/
eliminate_allocation.hpp>
#include <migraph/eliminate_allocation.hpp>
#include <migraph/gpu/fuse_ops.hpp>
#include <migraph/gpu/fuse_ops.hpp>
#include <migraph/check_context.hpp>
#include <migraph/check_context.hpp>
#include <migraph/auto_contiguous.hpp>
#include <migraph/auto_contiguous.hpp>
#include <migraph/dead_code_elimination.hpp>
#include <migraph/dead_code_elimination.hpp>
#include <migraph/simplify_reshapes.hpp>
#include <migraph/simplify_reshapes.hpp>
#include <migraph/eliminate_contiguous.hpp>
#include <migraph/eliminate_contiguous.hpp>
#include <migraph/fwd_conv_batchnorm_rewrite.hpp>
namespace
migraph
{
namespace
migraph
{
namespace
gpu
{
namespace
gpu
{
...
@@ -20,6 +21,8 @@ std::vector<pass> target::get_passes(migraph::context& gctx) const
...
@@ -20,6 +21,8 @@ std::vector<pass> target::get_passes(migraph::context& gctx) const
// clang-format off
// clang-format off
return
return
{
{
dead_code_elimination
{},
fwd_conv_batchnorm_rewrite
{},
dead_code_elimination
{},
dead_code_elimination
{},
auto_contiguous
{},
auto_contiguous
{},
simplify_reshapes
{},
simplify_reshapes
{},
...
@@ -31,7 +34,7 @@ std::vector<pass> target::get_passes(migraph::context& gctx) const
...
@@ -31,7 +34,7 @@ std::vector<pass> target::get_passes(migraph::context& gctx) const
eliminate_contiguous
{},
eliminate_contiguous
{},
dead_code_elimination
{},
dead_code_elimination
{},
write_literals
{
&
ctx
},
write_literals
{
&
ctx
},
eliminate_allocation
{},
eliminate_allocation
{
""
},
check_context
<
context
>
{},
check_context
<
context
>
{},
dead_code_elimination
{}
dead_code_elimination
{}
};
};
...
...
test/eliminate_allocation_test.cpp
0 → 100644
View file @
d9170e2d
#include <migraph/eliminate_allocation.hpp>
#include <migraph/dead_code_elimination.hpp>
#include <migraph/operators.hpp>
#include <basic_ops.hpp>
#include <test.hpp>
struct
eliminate_allocation_target
{
std
::
size_t
align
=
32
;
std
::
string
name
()
const
{
return
"eliminate_allocation"
;
}
std
::
vector
<
migraph
::
pass
>
get_passes
(
migraph
::
context
&
)
const
{
return
{
migraph
::
eliminate_allocation
{
"allocate"
,
align
},
migraph
::
dead_code_elimination
{}};
}
migraph
::
context
get_context
()
const
{
return
{};
}
};
struct
allocate
{
migraph
::
shape
s
{};
std
::
string
name
()
const
{
return
"allocate"
;
}
migraph
::
shape
compute_shape
(
const
std
::
vector
<
migraph
::
shape
>&
inputs
)
const
{
migraph
::
check_shapes
{
inputs
}.
has
(
0
);
return
s
;
}
migraph
::
argument
compute
(
migraph
::
context
&
,
const
migraph
::
shape
&
output_shape
,
const
std
::
vector
<
migraph
::
argument
>&
)
const
{
return
{
output_shape
};
}
};
void
basic
()
{
migraph
::
program
p
;
auto
a1
=
p
.
add_instruction
(
allocate
{
migraph
::
shape
{
migraph
::
shape
::
float_type
,
{
8
}}});
auto
p1
=
p
.
add_instruction
(
pass_op
{},
a1
);
auto
a2
=
p
.
add_instruction
(
allocate
{
migraph
::
shape
{
migraph
::
shape
::
float_type
,
{
40
}}});
auto
p2
=
p
.
add_instruction
(
pass_op
{},
a2
,
p1
);
auto
a3
=
p
.
add_instruction
(
allocate
{
migraph
::
shape
{
migraph
::
shape
::
float_type
,
{
200
}}});
p
.
add_instruction
(
pass_op
{},
a3
,
p2
);
p
.
compile
(
eliminate_allocation_target
{});
EXPECT
(
p
.
get_shape
()
==
migraph
::
shape
{
migraph
::
shape
::
float_type
,
{
200
}});
EXPECT
(
p
.
get_parameter_shape
(
"memory"
).
bytes
()
==
(
8
*
4
+
40
*
4
+
200
*
4
));
}
void
aligned
()
{
migraph
::
program
p
;
auto
a1
=
p
.
add_instruction
(
allocate
{
migraph
::
shape
{
migraph
::
shape
::
float_type
,
{
1
}}});
auto
p1
=
p
.
add_instruction
(
pass_op
{},
a1
);
auto
a2
=
p
.
add_instruction
(
allocate
{
migraph
::
shape
{
migraph
::
shape
::
float_type
,
{
2
}}});
auto
p2
=
p
.
add_instruction
(
pass_op
{},
a2
,
p1
);
auto
a3
=
p
.
add_instruction
(
allocate
{
migraph
::
shape
{
migraph
::
shape
::
float_type
,
{
200
}}});
p
.
add_instruction
(
pass_op
{},
a3
,
p2
);
p
.
compile
(
eliminate_allocation_target
{});
EXPECT
(
p
.
get_shape
()
==
migraph
::
shape
{
migraph
::
shape
::
float_type
,
{
200
}});
EXPECT
(
p
.
get_parameter_shape
(
"memory"
).
bytes
()
==
(
32
+
32
+
200
*
4
));
}
void
unaligned
()
{
migraph
::
program
p
;
auto
a1
=
p
.
add_instruction
(
allocate
{
migraph
::
shape
{
migraph
::
shape
::
float_type
,
{
1
}}});
auto
p1
=
p
.
add_instruction
(
pass_op
{},
a1
);
auto
a2
=
p
.
add_instruction
(
allocate
{
migraph
::
shape
{
migraph
::
shape
::
float_type
,
{
2
}}});
auto
p2
=
p
.
add_instruction
(
pass_op
{},
a2
,
p1
);
auto
a3
=
p
.
add_instruction
(
allocate
{
migraph
::
shape
{
migraph
::
shape
::
float_type
,
{
200
}}});
p
.
add_instruction
(
pass_op
{},
a3
,
p2
);
p
.
compile
(
eliminate_allocation_target
{
1
});
EXPECT
(
p
.
get_shape
()
==
migraph
::
shape
{
migraph
::
shape
::
float_type
,
{
200
}});
EXPECT
(
p
.
get_parameter_shape
(
"memory"
).
bytes
()
==
(
1
*
4
+
2
*
4
+
200
*
4
));
}
void
float_aligned
()
{
migraph
::
program
p
;
auto
a1
=
p
.
add_instruction
(
allocate
{
migraph
::
shape
{
migraph
::
shape
::
float_type
,
{
1
}}});
auto
p1
=
p
.
add_instruction
(
pass_op
{},
a1
);
auto
a2
=
p
.
add_instruction
(
allocate
{
migraph
::
shape
{
migraph
::
shape
::
float_type
,
{
2
}}});
auto
p2
=
p
.
add_instruction
(
pass_op
{},
a2
,
p1
);
auto
a3
=
p
.
add_instruction
(
allocate
{
migraph
::
shape
{
migraph
::
shape
::
float_type
,
{
200
}}});
p
.
add_instruction
(
pass_op
{},
a3
,
p2
);
p
.
compile
(
eliminate_allocation_target
{
4
});
EXPECT
(
p
.
get_shape
()
==
migraph
::
shape
{
migraph
::
shape
::
float_type
,
{
200
}});
EXPECT
(
p
.
get_parameter_shape
(
"memory"
).
bytes
()
==
(
1
*
4
+
2
*
4
+
200
*
4
));
}
int
main
()
{
basic
();
aligned
();
unaligned
();
float_aligned
();
}
test/gpu/literal.cpp
0 → 100644
View file @
d9170e2d
#include <test.hpp>
#include <basic_ops.hpp>
#include <migraph/program.hpp>
#include <migraph/generate.hpp>
#include <migraph/gpu/target.hpp>
#include <migraph/gpu/hip.hpp>
void
gpu_literal_test
()
{
migraph
::
program
p
;
auto
lit
=
generate_literal
(
migraph
::
shape
{
migraph
::
shape
::
float_type
,
{
4
,
3
,
3
,
3
}});
p
.
add_literal
(
lit
);
p
.
compile
(
migraph
::
gpu
::
target
{});
auto
result
=
p
.
eval
({});
EXPECT
(
lit
==
migraph
::
gpu
::
from_gpu
(
result
));
}
int
main
()
{
gpu_literal_test
();
}
test/gpu/miopen.cpp
View file @
d9170e2d
...
@@ -8,7 +8,7 @@
...
@@ -8,7 +8,7 @@
#include <migraph/gpu/hip.hpp>
#include <migraph/gpu/hip.hpp>
#include <migraph/manage_ptr.hpp>
#include <migraph/manage_ptr.hpp>
#include <migraph/type_name.hpp>
#include <migraph/type_name.hpp>
#include <migraph/verify.hpp>
#include <migraph/verify
_args
.hpp>
#include <miopen/miopen.h>
#include <miopen/miopen.h>
...
@@ -77,6 +77,12 @@ struct auto_print
...
@@ -77,6 +77,12 @@ struct auto_print
};
};
std
::
array
<
std
::
function
<
void
()
>
,
2
>
auto_print
::
handlers
=
{};
std
::
array
<
std
::
function
<
void
()
>
,
2
>
auto_print
::
handlers
=
{};
template
<
class
T
>
auto
get_hash
(
const
T
&
x
)
{
return
std
::
hash
<
T
>
{}(
x
);
}
void
compile_check
(
migraph
::
program
&
p
,
const
migraph
::
target
&
t
)
void
compile_check
(
migraph
::
program
&
p
,
const
migraph
::
target
&
t
)
{
{
auto
name
=
t
.
name
();
auto
name
=
t
.
name
();
...
@@ -100,7 +106,7 @@ migraph::argument run_cpu()
...
@@ -100,7 +106,7 @@ migraph::argument run_cpu()
migraph
::
program
::
parameter_map
m
;
migraph
::
program
::
parameter_map
m
;
for
(
auto
&&
x
:
p
.
get_parameter_shapes
())
for
(
auto
&&
x
:
p
.
get_parameter_shapes
())
{
{
m
[
x
.
first
]
=
migraph
::
generate_argument
(
x
.
second
);
m
[
x
.
first
]
=
migraph
::
generate_argument
(
x
.
second
,
get_hash
(
x
.
first
)
);
}
}
return
p
.
eval
(
m
);
return
p
.
eval
(
m
);
}
}
...
@@ -112,52 +118,15 @@ migraph::argument run_gpu()
...
@@ -112,52 +118,15 @@ migraph::argument run_gpu()
auto
p
=
v
.
create_program
();
auto
p
=
v
.
create_program
();
auto_print
pp
{
p
,
1
};
auto_print
pp
{
p
,
1
};
compile_check
(
p
,
migraph
::
gpu
::
target
{});
compile_check
(
p
,
migraph
::
gpu
::
target
{});
migraph
::
program
::
parameter_map
m
;
migraph
::
program
::
parameter_map
m
;
for
(
auto
&&
x
:
p
.
get_parameter_shapes
())
for
(
auto
&&
x
:
p
.
get_parameter_shapes
())
{
{
m
[
x
.
first
]
=
migraph
::
gpu
::
to_gpu
(
migraph
::
generate_argument
(
x
.
second
));
m
[
x
.
first
]
=
migraph
::
gpu
::
to_gpu
(
migraph
::
generate_argument
(
x
.
second
,
get_hash
(
x
.
first
)
));
}
}
EXPECT
(
bool
{
m
.
find
(
"output"
)
!=
m
.
end
()});
return
migraph
::
gpu
::
from_gpu
(
p
.
eval
(
m
));
return
migraph
::
gpu
::
from_gpu
(
p
.
eval
(
m
));
}
}
void
verify_args
(
const
std
::
string
&
name
,
const
migraph
::
argument
&
cpu_arg
,
const
migraph
::
argument
&
gpu_arg
)
{
visit_all
(
cpu_arg
,
gpu_arg
)([
&
](
auto
cpu
,
auto
gpu
)
{
if
(
not
migraph
::
verify_range
(
cpu
,
gpu
))
{
// TODO: Check for nans
std
::
cout
<<
"FAILED: "
<<
name
<<
std
::
endl
;
// std::cout << cpu << std::endl;
// std::cout << gpu << std::endl;
if
(
migraph
::
range_zero
(
cpu
))
std
::
cout
<<
"Cpu data is all zeros"
<<
std
::
endl
;
if
(
migraph
::
range_zero
(
gpu
))
std
::
cout
<<
"Gpu data is all zeros"
<<
std
::
endl
;
auto
idx
=
migraph
::
mismatch_idx
(
cpu
,
gpu
,
migraph
::
float_equal
);
if
(
idx
<
migraph
::
range_distance
(
cpu
))
{
std
::
cout
<<
"Mismatch at "
<<
idx
<<
": "
<<
cpu
[
idx
]
<<
" != "
<<
gpu
[
idx
]
<<
std
::
endl
;
}
auto
cpu_nan_idx
=
find_idx
(
cpu
,
migraph
::
not_finite
);
if
(
cpu_nan_idx
>=
0
)
std
::
cout
<<
"Non finite number found in cpu at "
<<
cpu_nan_idx
<<
": "
<<
cpu
[
cpu_nan_idx
]
<<
std
::
endl
;
auto
gpu_nan_idx
=
find_idx
(
gpu
,
migraph
::
not_finite
);
if
(
gpu_nan_idx
>=
0
)
std
::
cout
<<
"Non finite number found in gpu at "
<<
gpu_nan_idx
<<
": "
<<
gpu
[
gpu_nan_idx
]
<<
std
::
endl
;
}
});
}
template
<
class
V
>
template
<
class
V
>
void
verify_program
()
void
verify_program
()
{
{
...
@@ -210,6 +179,75 @@ struct test_add_broadcast
...
@@ -210,6 +179,75 @@ struct test_add_broadcast
}
}
};
};
struct
test_add_broadcast2
{
migraph
::
program
create_program
()
const
{
migraph
::
program
p
;
migraph
::
shape
s
{
migraph
::
shape
::
float_type
,
{
3
}};
auto
x
=
p
.
add_parameter
(
"x"
,
{
migraph
::
shape
::
float_type
,
{
2
,
3
,
4
}});
auto
y
=
p
.
add_parameter
(
"y"
,
{
migraph
::
shape
::
float_type
,
{
3
}});
auto
by
=
p
.
add_instruction
(
migraph
::
broadcast
{
1
},
x
,
y
);
p
.
add_instruction
(
migraph
::
add
{},
x
,
by
);
return
p
;
}
};
struct
test_add_broadcast3
{
migraph
::
program
create_program
()
const
{
migraph
::
program
p
;
migraph
::
shape
s
{
migraph
::
shape
::
float_type
,
{
3
}};
auto
x
=
p
.
add_parameter
(
"x"
,
{
migraph
::
shape
::
float_type
,
{
2
,
4
,
5
}});
auto
y
=
p
.
add_parameter
(
"y"
,
{
migraph
::
shape
::
float_type
,
{
4
}});
auto
by
=
p
.
add_instruction
(
migraph
::
broadcast
{
1
},
x
,
y
);
p
.
add_instruction
(
migraph
::
add
{},
x
,
by
);
return
p
;
}
};
struct
test_add_broadcast4
{
migraph
::
program
create_program
()
const
{
migraph
::
program
p
;
migraph
::
shape
s
{
migraph
::
shape
::
float_type
,
{
3
}};
auto
x
=
p
.
add_parameter
(
"x"
,
{
migraph
::
shape
::
float_type
,
{
2
,
3
,
5
}});
auto
y
=
p
.
add_parameter
(
"y"
,
{
migraph
::
shape
::
float_type
,
{
3
}});
auto
by
=
p
.
add_instruction
(
migraph
::
broadcast
{
1
},
x
,
y
);
p
.
add_instruction
(
migraph
::
add
{},
x
,
by
);
return
p
;
}
};
struct
test_add_broadcast5
{
migraph
::
program
create_program
()
const
{
migraph
::
program
p
;
migraph
::
shape
s
{
migraph
::
shape
::
float_type
,
{
3
}};
auto
x
=
p
.
add_parameter
(
"x"
,
{
migraph
::
shape
::
float_type
,
{
2
,
4
,
8
}});
auto
y
=
p
.
add_parameter
(
"y"
,
{
migraph
::
shape
::
float_type
,
{
4
}});
auto
by
=
p
.
add_instruction
(
migraph
::
broadcast
{
1
},
x
,
y
);
p
.
add_instruction
(
migraph
::
add
{},
x
,
by
);
return
p
;
}
};
struct
test_conv
{
migraph
::
program
create_program
()
const
{
migraph
::
program
p
;
auto
input
=
p
.
add_parameter
(
"x"
,
migraph
::
shape
{
migraph
::
shape
::
float_type
,
{
4
,
3
,
3
,
3
}});
auto
weights
=
p
.
add_parameter
(
"w"
,
migraph
::
shape
{
migraph
::
shape
::
float_type
,
{
4
,
3
,
3
,
3
}});
p
.
add_instruction
(
migraph
::
convolution
{},
input
,
weights
);
return
p
;
}
};
struct
test_conv_relu
struct
test_conv_relu
{
{
migraph
::
program
create_program
()
const
migraph
::
program
create_program
()
const
...
@@ -414,10 +452,50 @@ struct test_conv_bn_relu_pooling
...
@@ -414,10 +452,50 @@ struct test_conv_bn_relu_pooling
}
}
};
};
struct
test_conv_bn_relu_pooling2
{
static
migraph
::
instruction_ref
add_bn
(
migraph
::
program
&
p
,
migraph
::
instruction_ref
x
,
std
::
size_t
channels
)
{
migraph
::
shape
vars
{
migraph
::
shape
::
float_type
,
{
channels
}};
auto
scale
=
p
.
add_literal
(
migraph
::
abs
(
migraph
::
generate_literal
(
vars
,
1
+
channels
)));
auto
bias
=
p
.
add_literal
(
migraph
::
abs
(
migraph
::
generate_literal
(
vars
,
2
+
channels
)));
auto
mean
=
p
.
add_literal
(
migraph
::
abs
(
migraph
::
generate_literal
(
vars
,
3
+
channels
)));
auto
variance
=
p
.
add_literal
(
migraph
::
abs
(
migraph
::
generate_literal
(
vars
,
4
+
channels
)));
return
p
.
add_instruction
(
migraph
::
batch_norm_inference
{},
x
,
scale
,
bias
,
mean
,
variance
);
}
migraph
::
program
create_program
()
const
{
migraph
::
program
p
;
migraph
::
shape
xs1
{
migraph
::
shape
::
float_type
,
{
1
,
512
,
7
,
7
}};
migraph
::
shape
xs2
{
migraph
::
shape
::
float_type
,
{
1
,
1024
,
14
,
14
}};
migraph
::
shape
ws1
{
migraph
::
shape
::
float_type
,
{
2048
,
512
,
1
,
1
}};
migraph
::
shape
ws2
{
migraph
::
shape
::
float_type
,
{
2048
,
1024
,
1
,
1
}};
auto
x1
=
p
.
add_parameter
(
"x1"
,
xs1
);
auto
w1
=
p
.
add_parameter
(
"w1"
,
ws1
);
auto
conv1
=
p
.
add_instruction
(
migraph
::
convolution
{{
0
,
0
},
{
1
,
1
},
{
1
,
1
}},
x1
,
w1
);
auto
bn1
=
add_bn
(
p
,
conv1
,
2048
);
auto
x2
=
p
.
add_parameter
(
"x2"
,
xs2
);
auto
w2
=
p
.
add_parameter
(
"w2"
,
ws2
);
auto
conv2
=
p
.
add_instruction
(
migraph
::
convolution
{{
0
,
0
},
{
2
,
2
},
{
1
,
1
}},
x2
,
w2
);
auto
bn2
=
add_bn
(
p
,
conv2
,
2048
);
auto
add
=
p
.
add_instruction
(
migraph
::
add
{},
bn1
,
bn2
);
auto
relu
=
p
.
add_instruction
(
migraph
::
activation
{
"relu"
},
add
);
p
.
add_instruction
(
migraph
::
pooling
{
"average"
,
{
1
,
1
},
{
2
,
2
},
{
3
,
3
}},
relu
);
return
p
;
}
};
int
main
()
int
main
()
{
{
verify_program
<
test_add
>
();
verify_program
<
test_add
>
();
verify_program
<
test_add_broadcast
>
();
verify_program
<
test_add_broadcast
>
();
verify_program
<
test_add_broadcast2
>
();
verify_program
<
test_add_broadcast3
>
();
verify_program
<
test_add_broadcast4
>
();
verify_program
<
test_add_broadcast5
>
();
verify_program
<
test_conv
>
();
verify_program
<
test_conv_relu
>
();
verify_program
<
test_conv_relu
>
();
verify_program
<
test_add_relu
>
();
verify_program
<
test_add_relu
>
();
verify_program
<
test_conv_pooling
>
();
verify_program
<
test_conv_pooling
>
();
...
@@ -431,4 +509,5 @@ int main()
...
@@ -431,4 +509,5 @@ int main()
verify_program
<
test_batchnorm_inference
>
();
verify_program
<
test_batchnorm_inference
>
();
verify_program
<
test_batchnorm_inference_2
>
();
verify_program
<
test_batchnorm_inference_2
>
();
verify_program
<
test_conv_bn_relu_pooling
>
();
verify_program
<
test_conv_bn_relu_pooling
>
();
verify_program
<
test_conv_bn_relu_pooling2
>
();
}
}
test/operation.cpp
View file @
d9170e2d
...
@@ -43,7 +43,9 @@ void operation_copy_test()
...
@@ -43,7 +43,9 @@ void operation_copy_test()
simple_operation
s
{};
simple_operation
s
{};
migraph
::
operation
op1
=
s
;
// NOLINT
migraph
::
operation
op1
=
s
;
// NOLINT
migraph
::
operation
op2
=
op1
;
// NOLINT
migraph
::
operation
op2
=
op1
;
// NOLINT
// cppcheck-suppress duplicateExpression
EXPECT
(
s
.
name
()
==
op1
.
name
());
EXPECT
(
s
.
name
()
==
op1
.
name
());
// cppcheck-suppress duplicateExpression
EXPECT
(
op2
.
name
()
==
op1
.
name
());
EXPECT
(
op2
.
name
()
==
op1
.
name
());
}
}
...
...
test/shape_test.cpp
View file @
d9170e2d
...
@@ -97,6 +97,72 @@ void test_shape4()
...
@@ -97,6 +97,72 @@ void test_shape4()
EXPECT
(
s
.
index
(
s
.
elements
()
-
1
)
==
s
.
elements
()
-
1
);
EXPECT
(
s
.
index
(
s
.
elements
()
-
1
)
==
s
.
elements
()
-
1
);
}
}
void
test_shape42
()
{
migraph
::
shape
s
{
migraph
::
shape
::
float_type
,
{
100
,
32
,
8
,
8
},
{
2048
,
64
,
8
,
1
}};
EXPECT
(
s
.
standard
());
EXPECT
(
s
.
packed
());
EXPECT
(
not
s
.
transposed
());
EXPECT
(
not
s
.
broadcasted
());
EXPECT
(
s
.
type
()
==
migraph
::
shape
::
float_type
);
EXPECT
(
s
.
lens
()[
0
]
==
100
);
EXPECT
(
s
.
lens
()[
1
]
==
32
);
EXPECT
(
s
.
lens
()[
2
]
==
8
);
EXPECT
(
s
.
lens
()[
3
]
==
8
);
EXPECT
(
s
.
strides
()[
0
]
==
s
.
lens
()[
1
]
*
s
.
strides
()[
1
]);
EXPECT
(
s
.
strides
()[
1
]
==
s
.
lens
()[
2
]
*
s
.
strides
()[
2
]);
EXPECT
(
s
.
strides
()[
2
]
==
s
.
lens
()[
3
]
*
s
.
strides
()[
3
]);
EXPECT
(
s
.
strides
()[
3
]
==
1
);
EXPECT
(
s
.
elements
()
==
100
*
32
*
8
*
8
);
EXPECT
(
s
.
bytes
()
==
100
*
32
*
8
*
8
*
sizeof
(
float
));
EXPECT
(
s
.
index
({
0
,
0
,
0
,
0
})
==
0
);
EXPECT
(
s
.
index
({
0
,
0
,
0
,
1
})
==
1
);
EXPECT
(
s
.
index
({
0
,
0
,
0
,
0
})
==
s
.
index
(
0
));
EXPECT
(
s
.
index
({
0
,
0
,
0
,
1
})
==
s
.
index
(
1
));
EXPECT
(
s
.
index
({
0
,
0
,
1
,
0
})
==
s
.
index
(
8
));
EXPECT
(
s
.
index
({
0
,
1
,
0
,
0
})
==
s
.
index
(
8
*
8
));
EXPECT
(
s
.
index
({
1
,
0
,
0
,
0
})
==
s
.
index
(
8
*
8
*
32
));
EXPECT
(
s
.
index
(
0
)
==
0
);
EXPECT
(
s
.
index
(
1
)
==
1
);
EXPECT
(
s
.
index
(
8
)
==
8
);
EXPECT
(
s
.
index
(
8
*
8
)
==
8
*
8
);
EXPECT
(
s
.
index
(
8
*
8
*
32
)
==
8
*
8
*
32
);
EXPECT
(
s
.
index
(
s
.
elements
()
-
1
)
==
s
.
elements
()
-
1
);
}
void
test_shape4_transposed
()
{
migraph
::
shape
s
{
migraph
::
shape
::
float_type
,
{
32
,
100
,
8
,
8
},
{
64
,
2048
,
8
,
1
}};
EXPECT
(
s
.
transposed
());
EXPECT
(
s
.
packed
());
EXPECT
(
not
s
.
standard
());
EXPECT
(
not
s
.
broadcasted
());
EXPECT
(
s
.
type
()
==
migraph
::
shape
::
float_type
);
EXPECT
(
s
.
lens
()[
0
]
==
32
);
EXPECT
(
s
.
lens
()[
1
]
==
100
);
EXPECT
(
s
.
lens
()[
2
]
==
8
);
EXPECT
(
s
.
lens
()[
3
]
==
8
);
EXPECT
(
s
.
strides
()[
0
]
==
64
);
EXPECT
(
s
.
strides
()[
1
]
==
2048
);
EXPECT
(
s
.
strides
()[
2
]
==
8
);
EXPECT
(
s
.
strides
()[
3
]
==
1
);
EXPECT
(
s
.
elements
()
==
100
*
32
*
8
*
8
);
EXPECT
(
s
.
bytes
()
==
100
*
32
*
8
*
8
*
sizeof
(
float
));
EXPECT
(
s
.
index
({
0
,
0
,
0
,
0
})
==
0
);
EXPECT
(
s
.
index
({
0
,
0
,
0
,
1
})
==
1
);
EXPECT
(
s
.
index
({
0
,
0
,
0
,
0
})
==
s
.
index
(
0
));
EXPECT
(
s
.
index
({
0
,
0
,
0
,
1
})
==
s
.
index
(
1
));
EXPECT
(
s
.
index
({
0
,
0
,
1
,
0
})
==
s
.
index
(
8
));
EXPECT
(
s
.
index
({
0
,
1
,
0
,
0
})
==
s
.
index
(
8
*
8
));
EXPECT
(
s
.
index
({
1
,
0
,
0
,
0
})
==
s
.
index
(
8
*
8
*
100
));
EXPECT
(
s
.
index
(
0
)
==
0
);
EXPECT
(
s
.
index
(
1
)
==
1
);
EXPECT
(
s
.
index
(
8
)
==
8
);
EXPECT
(
s
.
index
(
8
*
8
)
==
2048
);
EXPECT
(
s
.
index
(
8
*
8
*
100
)
==
64
);
EXPECT
(
s
.
index
(
s
.
elements
()
-
1
)
==
s
.
elements
()
-
1
);
}
void
test_shape4_nonpacked
()
void
test_shape4_nonpacked
()
{
{
std
::
vector
<
std
::
size_t
>
lens
=
{
100
,
32
,
8
,
8
};
std
::
vector
<
std
::
size_t
>
lens
=
{
100
,
32
,
8
,
8
};
...
@@ -134,11 +200,10 @@ void test_shape4_nonpacked()
...
@@ -134,11 +200,10 @@ void test_shape4_nonpacked()
EXPECT
(
s
.
index
(
1
)
==
1
);
EXPECT
(
s
.
index
(
1
)
==
1
);
EXPECT
(
s
.
index
({
0
,
0
,
0
,
0
})
==
0
);
EXPECT
(
s
.
index
({
0
,
0
,
0
,
0
})
==
0
);
EXPECT
(
s
.
index
({
0
,
0
,
0
,
1
})
==
s
.
index
(
1
));
EXPECT
(
s
.
index
({
0
,
0
,
0
,
1
})
==
s
.
index
(
1
));
// TODO: Fix these tests
EXPECT
(
s
.
index
({
0
,
0
,
1
,
0
})
==
s
.
index
(
8
));
// EXPECT(s.index({0, 0, 1, 0}) == s.index(8));
EXPECT
(
s
.
index
({
0
,
1
,
0
,
0
})
==
s
.
index
(
8
*
8
));
// EXPECT(s.index({0, 1, 0, 0}) == s.index(8 * 8));
EXPECT
(
s
.
index
({
1
,
0
,
0
,
0
})
==
s
.
index
(
8
*
8
*
32
));
// EXPECT(s.index({1, 0, 0, 0}) == s.index(8 * 8 * 32));
EXPECT
(
s
.
index
(
s
.
elements
()
-
1
)
==
469273
);
// EXPECT(s.index(s.elements() - 1) == 469273);
}
}
int
main
()
int
main
()
...
@@ -151,5 +216,7 @@ int main()
...
@@ -151,5 +216,7 @@ int main()
test_shape_broadcasted
();
test_shape_broadcasted
();
test_shape_default_copy
();
test_shape_default_copy
();
test_shape4
();
test_shape4
();
test_shape42
();
test_shape4_transposed
();
test_shape4_nonpacked
();
test_shape4_nonpacked
();
}
}
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment