Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
2d827e27
Commit
2d827e27
authored
Aug 31, 2018
by
mei-ye
Browse files
more coding conventions fix
parents
371a0f29
4f8eb0e2
Changes
29
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
360 additions
and
75 deletions
+360
-75
src/targets/gpu/device/include/migraph/gpu/device/nary.hpp
src/targets/gpu/device/include/migraph/gpu/device/nary.hpp
+205
-23
src/targets/gpu/device/include/migraph/gpu/device/tensor.hpp
src/targets/gpu/device/include/migraph/gpu/device/tensor.hpp
+6
-6
src/targets/gpu/eliminate_allocation.cpp
src/targets/gpu/eliminate_allocation.cpp
+1
-1
src/targets/gpu/fuse_ops.cpp
src/targets/gpu/fuse_ops.cpp
+1
-1
src/targets/gpu/include/migraph/gpu/device/add.hpp
src/targets/gpu/include/migraph/gpu/device/add.hpp
+17
-0
src/targets/gpu/include/migraph/gpu/device/add_relu.hpp
src/targets/gpu/include/migraph/gpu/device/add_relu.hpp
+1
-1
src/targets/gpu/lowering.cpp
src/targets/gpu/lowering.cpp
+21
-3
src/targets/gpu/target.cpp
src/targets/gpu/target.cpp
+3
-0
test/gpu/miopen.cpp
test/gpu/miopen.cpp
+105
-40
No files found.
src/targets/gpu/device/include/migraph/gpu/device/nary.hpp
View file @
2d827e27
...
...
@@ -10,16 +10,25 @@ namespace migraph {
namespace
gpu
{
namespace
device
{
template
<
class
...
Arguments
>
auto
nary
(
argument
result
,
Arguments
...
args
)
template
<
class
T
>
using
vec4
=
T
__attribute__
((
ext_vector_type
(
4
)));
template
<
class
T
>
__device__
__host__
vec4
<
T
>*
as_vec4
(
T
*
x
)
{
return
[
=
](
auto
f
)
{
if
(
all_of
({
args
...},
[](
const
shape
&
s
)
{
return
s
.
standard
();
}))
nary_standard
(
result
,
args
...)(
f
);
else
nary_nonstandard
(
result
,
args
...)(
f
);
return
reinterpret_cast
<
vec4
<
T
>*>
(
x
);
}
};
template
<
class
T
>
__device__
__host__
T
*
as_pointer
(
vec4
<
T
>*
x
)
{
return
reinterpret_cast
<
T
*>
(
x
);
}
template
<
class
...
Ts
>
auto
pack_vec4
(
Ts
...
xs
)
{
return
[
=
](
auto
f
,
std
::
size_t
n
)
{
return
f
(
as_vec4
(
xs
)[
n
]...);
};
}
template
<
class
F
,
class
...
Arguments
>
...
...
@@ -28,14 +37,12 @@ auto nary_nonstandard_impl(F f, argument result, Arguments... args)
const
auto
&
output_shape
=
result
.
get_shape
();
visit_all
(
result
,
args
...)([
&
](
auto
output
,
auto
...
inputs
)
{
visit_tensor_size
(
output_shape
.
lens
().
size
(),
[
&
](
auto
ndim
)
{
auto
data
=
make_sequence
(
std
::
make_pair
(
hip_tensor_descriptor
<
ndim
>
{
inputs
.
get_shape
().
lens
(),
inputs
.
get_shape
().
strides
()},
inputs
.
data
())...);
hip_tensor_descriptor
<
ndim
>
out_desc
(
output_shape
.
lens
(),
output_shape
.
strides
());
auto
data
=
pack
(
std
::
make_pair
(
hip_tensor_descriptor
<
ndim
>
{
inputs
.
get_shape
()},
inputs
.
data
())...);
hip_tensor_descriptor
<
ndim
>
out_desc
(
output_shape
);
auto
*
outp
=
output
.
data
();
gs_launch
(
output_shape
.
elements
())([
=
](
auto
i
)
{
data
([
&
](
auto
...
ps
)
{
data
([
&
](
auto
&&
...
ps
)
{
auto
outidx
=
out_desc
.
multi
(
i
);
outp
[
i
]
=
f
(
ps
.
second
[
ps
.
first
.
linear
(
outidx
)]...);
});
...
...
@@ -44,24 +51,199 @@ auto nary_nonstandard_impl(F f, argument result, Arguments... args)
});
}
template
<
class
F
>
void
binary_broadcast_vec_impl
(
F
f
,
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
)
{
const
auto
&
output_shape
=
result
.
get_shape
();
const
auto
&
b_shape
=
arg2
.
get_shape
();
auto
bdim
=
std
::
distance
(
b_shape
.
strides
().
begin
(),
std
::
find_if
(
b_shape
.
strides
().
begin
(),
b_shape
.
strides
().
end
(),
[](
auto
x
)
{
return
x
!=
0
;
}));
auto
bdim_len
=
output_shape
.
lens
()[
bdim
];
auto
bdim_stride
=
output_shape
.
strides
()[
bdim
];
auto
bdim_next_stride
=
bdim_stride
*
bdim_len
;
visit_all
(
result
,
arg1
,
arg2
)([
&
](
auto
output
,
auto
input1
,
auto
input2
)
{
using
type
=
std
::
remove_cv_t
<
typename
decltype
(
output
)
::
value_type
>
;
auto
*
xp
=
as_vec4
(
input1
.
data
());
auto
*
yp
=
as_vec4
(
input2
.
data
());
auto
*
outp
=
as_vec4
(
output
.
data
());
const
std
::
size_t
vec_size
=
4
;
const
std
::
size_t
nlocal
=
1024
;
const
std
::
size_t
nglobal
=
256
*
nlocal
;
const
std
::
size_t
n
=
output
.
size
()
/
vec_size
;
const
std
::
size_t
bdim_vec_len
=
bdim_len
/
vec_size
;
launch
(
nglobal
,
nlocal
)([
=
](
auto
idx
)
__device__
{
MIGRAPH_DEVICE_SHARED
vec4
<
type
>
buffer
[
2048
/
vec_size
];
// Load bias into LDS
for
(
size_t
i
=
idx
.
local
;
i
<
bdim_vec_len
;
i
+=
nlocal
)
{
buffer
[
i
]
=
yp
[
i
];
}
__syncthreads
();
auto
*
bp
=
as_pointer
(
buffer
);
// Process the data
for
(
size_t
i
=
idx
.
global
;
i
<
n
;
i
+=
nglobal
)
{
auto
bidx
=
((
i
*
vec_size
)
%
bdim_next_stride
)
/
bdim_stride
;
auto
b
=
bp
[
bidx
];
vec4
<
type
>
x
=
xp
[
i
];
vec4
<
type
>
out
=
outp
[
i
];
for
(
std
::
size_t
j
=
0
;
j
<
vec_size
;
j
++
)
{
out
[
j
]
=
f
(
x
[
j
],
b
);
}
outp
[
i
]
=
out
;
}
});
});
}
template
<
class
F
>
void
binary_broadcast_impl
(
F
f
,
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
)
{
const
auto
&
output_shape
=
result
.
get_shape
();
const
auto
&
b_shape
=
arg2
.
get_shape
();
auto
bdim
=
std
::
distance
(
b_shape
.
strides
().
begin
(),
std
::
find_if
(
b_shape
.
strides
().
begin
(),
b_shape
.
strides
().
end
(),
[](
auto
x
)
{
return
x
!=
0
;
}));
auto
bdim_len
=
output_shape
.
lens
()[
bdim
];
auto
bdim_stride
=
output_shape
.
strides
()[
bdim
];
auto
bdim_next_stride
=
bdim_stride
*
bdim_len
;
visit_all
(
result
,
arg1
,
arg2
)([
&
](
auto
output
,
auto
input1
,
auto
input2
)
{
using
type
=
std
::
remove_cv_t
<
typename
decltype
(
output
)
::
value_type
>
;
auto
*
xp
=
input1
.
data
();
auto
*
yp
=
input2
.
data
();
auto
*
outp
=
output
.
data
();
const
std
::
size_t
nlocal
=
1024
;
const
std
::
size_t
nglobal
=
256
*
nlocal
;
const
std
::
size_t
n
=
output
.
size
();
launch
(
nglobal
,
nlocal
)([
=
](
auto
idx
)
__device__
{
MIGRAPH_DEVICE_SHARED
type
buffer
[
2048
];
// Load bias into LDS
for
(
size_t
i
=
idx
.
local
;
i
<
bdim_len
;
i
+=
nlocal
)
{
buffer
[
i
]
=
yp
[
i
];
}
__syncthreads
();
// Process the data
for
(
size_t
i
=
idx
.
global
;
i
<
n
;
i
+=
nglobal
)
{
auto
bidx
=
(
i
%
bdim_next_stride
)
/
bdim_stride
;
auto
b
=
buffer
[
bidx
];
type
x
=
xp
[
i
];
outp
[
i
]
=
f
(
x
,
b
);
}
});
});
}
template
<
class
F
,
class
...
Arguments
>
void
nary_standard_vec_impl
(
F
f
,
argument
result
,
Arguments
...
args
)
{
// assert(x.get_shape().elements() == y.get_shape().elements());
const
auto
&
output_shape
=
result
.
get_shape
();
visit_all
(
result
,
args
...)([
&
](
auto
output
,
auto
...
inputs
)
{
using
type
=
std
::
remove_cv_t
<
typename
decltype
(
output
)
::
value_type
>
;
const
std
::
size_t
vec_size
=
4
;
auto
data
=
pack_vec4
(
inputs
.
data
()...);
auto
*
outp
=
as_vec4
(
output
.
data
());
gs_launch
(
output_shape
.
elements
()
/
vec_size
)([
=
](
auto
i
)
{
vec4
<
type
>
out
=
outp
[
i
];
data
(
[
&
](
auto
...
xs
)
{
for
(
std
::
size_t
j
=
0
;
j
<
vec_size
;
j
++
)
{
out
[
j
]
=
f
(
xs
[
j
]...);
}
},
i
);
outp
[
i
]
=
out
;
});
});
}
template
<
class
F
,
class
...
Arguments
>
void
nary_standard_impl
(
F
f
,
argument
result
,
Arguments
...
args
)
{
// assert(x.get_shape().elements() == y.get_shape().elements());
const
auto
&
output_shape
=
result
.
get_shape
();
visit_all
(
result
,
args
...)([
&
](
auto
output
,
auto
...
inputs
)
{
auto
data
=
pack
(
inputs
.
data
()...);
auto
*
outp
=
output
.
data
();
gs_launch
(
output_shape
.
elements
())(
[
=
](
auto
i
)
{
data
([
&
](
auto
...
xps
)
{
outp
[
i
]
=
f
(
xps
[
i
]...);
});
});
});
}
template
<
class
F
,
class
...
Arguments
>
void
nary_impl
(
F
f
,
argument
result
,
Arguments
...
args
)
{
bool
standard
=
all_of
({
args
.
get_shape
()...},
[](
const
shape
&
s
)
{
return
s
.
standard
();
});
bool
packed
=
all_of
({
args
.
get_shape
()...},
[](
const
shape
&
s
)
{
return
s
.
packed
();
});
bool
same_shapes
=
all_of
({
args
.
get_shape
()...},
[
&
](
const
shape
&
s
)
{
return
s
==
result
.
get_shape
();
});
if
(
standard
or
(
packed
and
same_shapes
))
nary_standard_impl
(
f
,
result
,
args
...);
else
nary_nonstandard_impl
(
f
,
result
,
args
...);
}
template
<
class
...
Arguments
>
auto
nary_nonstandard
(
argument
result
,
Arguments
...
args
)
{
return
[
=
](
auto
f
)
{
return
nary_nonstandard_impl
(
f
,
result
,
args
...);
};
return
[
=
](
auto
f
)
{
nary_nonstandard_impl
(
f
,
result
,
args
...);
};
}
template
<
class
...
Arguments
>
auto
nary_standard
(
argument
result
,
Arguments
...
args
)
{
return
[
=
](
auto
f
)
{
nary_standard_impl
(
f
,
result
,
args
...);
};
}
template
<
class
...
Arguments
>
auto
nary
(
argument
result
,
Arguments
...
args
)
{
return
[
=
](
auto
f
)
{
nary_impl
(
f
,
result
,
args
...);
};
}
inline
auto
nary
(
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
)
{
return
[
=
](
auto
f
)
{
// assert(x.get_shape().elements() == y.get_shape().elements());
const
auto
&
output_shape
=
result
.
get_shape
();
visit_all
(
result
,
args
...)([
&
](
auto
output
,
auto
...
inputs
)
{
auto
data
=
make_sequence
(
inputs
.
data
()...);
auto
*
outp
=
output
.
data
();
gs_launch
(
output_shape
.
elements
())(
[
=
](
auto
i
)
{
data
([
&
](
auto
...
xps
)
{
outp
[
i
]
=
f
(
xps
[
i
]...);
});
});
});
// TODO: Check result and arg1 shape is the same
if
(
arg1
.
get_shape
().
standard
()
and
arg2
.
get_shape
().
broadcasted
())
{
auto
not_zero
=
[](
auto
x
)
{
return
x
!=
0
;
};
const
auto
&
strides
=
arg2
.
get_shape
().
strides
();
auto
b_it
=
std
::
find_if
(
strides
.
begin
(),
strides
.
end
(),
not_zero
);
auto
b_idx
=
std
::
distance
(
strides
.
begin
(),
b_it
);
auto
b_len
=
result
.
get_shape
().
lens
()[
b_idx
];
auto
b_stride
=
result
.
get_shape
().
strides
()[
b_idx
];
assert
(
arg2
.
get_shape
().
lens
()[
b_idx
]
==
b_len
);
if
(
b_len
<=
2048
and
std
::
none_of
(
std
::
next
(
b_it
),
strides
.
end
(),
not_zero
))
{
const
bool
divisible_by_4
=
(
b_len
%
4
==
0
)
and
(
b_stride
%
4
==
0
)
and
(
arg1
.
get_shape
().
elements
()
%
4
==
0
);
if
(
divisible_by_4
)
binary_broadcast_vec_impl
(
f
,
result
,
arg1
,
arg2
);
else
binary_broadcast_impl
(
f
,
result
,
arg1
,
arg2
);
return
;
}
}
nary_impl
(
f
,
result
,
arg1
,
arg2
);
};
}
...
...
src/targets/gpu/device/include/migraph/gpu/device/tensor.hpp
View file @
2d827e27
...
...
@@ -2,6 +2,7 @@
#define MIGRAPH_GUARD_RTGLIB_DEAVICE_TENSOR_HPP
#include <hip/hip_runtime.h>
#include <migraph/functional.hpp>
namespace
migraph
{
namespace
gpu
{
...
...
@@ -53,14 +54,13 @@ template <size_t NDim>
struct
hip_tensor_descriptor
{
__device__
__host__
hip_tensor_descriptor
()
=
default
;
template
<
typename
T
,
typename
V
>
__device__
__host__
hip_tensor_descriptor
(
const
T
&
lens_ext
,
const
V
&
strides_ext
)
hip_tensor_descriptor
(
const
shape
&
s
)
{
for
(
size_t
i
=
0
;
i
<
NDim
;
i
++
)
lens
[
i
]
=
lens_ext
[
i
];
for
(
size_t
i
=
0
;
i
<
NDim
;
i
++
)
strides
[
i
]
=
strides_ext
[
i
];
std
::
copy
(
s
.
lens
().
begin
(),
s
.
lens
().
end
(),
lens
);
std
::
copy
(
s
.
strides
().
begin
(),
s
.
strides
().
end
(),
strides
);
}
__device__
__host__
hip_index
<
NDim
>
multi
(
size_t
idx
)
const
{
hip_index
<
NDim
>
result
{};
...
...
src/targets/gpu/eliminate_allocation.cpp
View file @
2d827e27
...
...
@@ -20,7 +20,7 @@ void eliminate_allocation::apply(program& p) const
continue
;
allocs
.
emplace_back
(
ins
,
n
);
std
::
size_t
size
=
ins
->
get_shape
().
bytes
();
n
+=
size
+
(
size
%
4
);
n
+=
size
+
(
size
%
32
);
}
auto
mem
=
p
.
add_parameter
(
"memory"
,
shape
{
shape
::
int8_type
,
{
n
}});
for
(
auto
&&
pp
:
allocs
)
...
...
src/targets/gpu/fuse_ops.cpp
View file @
2d827e27
...
...
@@ -12,7 +12,7 @@ struct hip_add_relu
std
::
string
name
()
const
{
return
"hip::add_relu"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
check_shapes
{
inputs
}.
has
(
3
).
standard
(
);
check_shapes
{
inputs
,
*
this
}.
has
(
3
);
return
inputs
.
front
();
}
argument
compute
(
context
&
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
...
...
src/targets/gpu/include/migraph/gpu/device/add.hpp
0 → 100644
View file @
2d827e27
#ifndef MIGRAPH_GUARD_RTGLIB_DEVICE_ADD_HPP
#define MIGRAPH_GUARD_RTGLIB_DEVICE_ADD_HPP
#include <migraph/argument.hpp>
namespace
migraph
{
namespace
gpu
{
namespace
device
{
void
add
(
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
);
}
// namespace device
}
// namespace gpu
}
// namespace migraph
#endif
src/targets/gpu/include/migraph/gpu/device/add_relu.hpp
View file @
2d827e27
...
...
@@ -8,7 +8,7 @@ namespace migraph {
namespace
gpu
{
namespace
device
{
void
add_relu
(
argument
result
,
argument
arg1
,
argument
arg2
);
void
add_relu
(
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
);
}
// namespace device
}
// namespace gpu
...
...
src/targets/gpu/lowering.cpp
View file @
2d827e27
...
...
@@ -9,6 +9,7 @@
#include <migraph/gpu/hip.hpp>
#include <migraph/dfor.hpp>
#include <migraph/gpu/device/contiguous.hpp>
#include <migraph/gpu/device/add.hpp>
#include <migraph/iterator_for.hpp>
#include <migraph/gpu/rocblas.hpp>
#include <migraph/gpu/context.hpp>
...
...
@@ -170,6 +171,23 @@ struct miopen_pooling
}
};
struct
hip_add
{
std
::
string
name
()
const
{
return
"gpu::add"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
// check_shapes{inputs, *this}.has(3).standard();
check_shapes
{
inputs
,
*
this
}.
has
(
3
);
return
inputs
.
at
(
0
);
}
argument
compute
(
context
&
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
{
device
::
add
(
args
[
2
],
args
[
0
],
args
[
1
]);
return
args
[
2
];
}
};
struct
miopen_add
{
std
::
string
name
()
const
{
return
"gpu::add"
;
}
...
...
@@ -204,7 +222,7 @@ struct miopen_add
struct
miopen_gemm
{
gemm
op
;
std
::
string
name
()
const
{
return
"gpu::
convolution
"
;
}
std
::
string
name
()
const
{
return
"gpu::
gemm
"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
check_shapes
{
inputs
,
*
this
}.
has
(
3
);
...
...
@@ -339,7 +357,7 @@ struct miopen_apply
instruction_ref
insert_allocation
(
instruction_ref
ins
,
const
shape
&
s
,
std
::
string
tag
=
""
)
{
if
(
ins
==
--
prog
->
end
())
if
(
ins
==
--
prog
->
end
()
and
not
tag
.
empty
()
)
{
return
prog
->
add_parameter
(
"output"
,
s
);
}
...
...
@@ -392,7 +410,7 @@ struct miopen_apply
{
auto
output
=
insert_allocation
(
ins
,
ins
->
result
);
return
prog
->
replace_instruction
(
ins
,
miopen
_add
{},
ins
->
arguments
.
at
(
0
),
ins
->
arguments
.
at
(
1
),
output
);
ins
,
hip
_add
{},
ins
->
arguments
.
at
(
0
),
ins
->
arguments
.
at
(
1
),
output
);
}
instruction_ref
apply_gemm
(
instruction_ref
ins
)
...
...
src/targets/gpu/target.cpp
View file @
2d827e27
...
...
@@ -11,6 +11,7 @@
#include <migraph/dead_code_elimination.hpp>
#include <migraph/simplify_reshapes.hpp>
#include <migraph/eliminate_contiguous.hpp>
#include <migraph/fwd_conv_batchnorm_rewrite.hpp>
namespace
migraph
{
namespace
gpu
{
...
...
@@ -21,6 +22,8 @@ std::vector<pass> target::get_passes(migraph::context& gctx) const
// clang-format off
return
{
dead_code_elimination
{},
fwd_conv_batchnorm_rewrite
{},
dead_code_elimination
{},
auto_contiguous
{},
simplify_reshapes
{},
...
...
test/gpu/miopen.cpp
View file @
2d827e27
...
...
@@ -8,7 +8,7 @@
#include <migraph/gpu/hip.hpp>
#include <migraph/manage_ptr.hpp>
#include <migraph/type_name.hpp>
#include <migraph/verify.hpp>
#include <migraph/verify
_args
.hpp>
#include <miopen/miopen.h>
...
...
@@ -77,6 +77,12 @@ struct auto_print
};
std
::
array
<
std
::
function
<
void
()
>
,
2
>
auto_print
::
handlers
=
{};
template
<
class
T
>
auto
get_hash
(
const
T
&
x
)
{
return
std
::
hash
<
T
>
{}(
x
);
}
void
compile_check
(
migraph
::
program
&
p
,
const
migraph
::
target
&
t
)
{
auto
name
=
t
.
name
();
...
...
@@ -100,7 +106,7 @@ migraph::argument run_cpu()
migraph
::
program
::
parameter_map
m
;
for
(
auto
&&
x
:
p
.
get_parameter_shapes
())
{
m
[
x
.
first
]
=
migraph
::
generate_argument
(
x
.
second
);
m
[
x
.
first
]
=
migraph
::
generate_argument
(
x
.
second
,
get_hash
(
x
.
first
)
);
}
return
p
.
eval
(
m
);
}
...
...
@@ -112,52 +118,15 @@ migraph::argument run_gpu()
auto
p
=
v
.
create_program
();
auto_print
pp
{
p
,
1
};
compile_check
(
p
,
migraph
::
gpu
::
target
{});
migraph
::
program
::
parameter_map
m
;
for
(
auto
&&
x
:
p
.
get_parameter_shapes
())
{
m
[
x
.
first
]
=
migraph
::
gpu
::
to_gpu
(
migraph
::
generate_argument
(
x
.
second
));
m
[
x
.
first
]
=
migraph
::
gpu
::
to_gpu
(
migraph
::
generate_argument
(
x
.
second
,
get_hash
(
x
.
first
)
));
}
return
migraph
::
gpu
::
from_gpu
(
p
.
eval
(
m
));
}
void
verify_args
(
const
std
::
string
&
name
,
const
migraph
::
argument
&
cpu_arg
,
const
migraph
::
argument
&
gpu_arg
)
{
visit_all
(
cpu_arg
,
gpu_arg
)([
&
](
auto
cpu
,
auto
gpu
)
{
if
(
not
migraph
::
verify_range
(
cpu
,
gpu
))
{
// TODO: Check for nans
std
::
cout
<<
"FAILED: "
<<
name
<<
std
::
endl
;
// std::cout << cpu << std::endl;
// std::cout << gpu << std::endl;
if
(
migraph
::
range_zero
(
cpu
))
std
::
cout
<<
"Cpu data is all zeros"
<<
std
::
endl
;
if
(
migraph
::
range_zero
(
gpu
))
std
::
cout
<<
"Gpu data is all zeros"
<<
std
::
endl
;
auto
idx
=
migraph
::
mismatch_idx
(
cpu
,
gpu
,
migraph
::
float_equal
);
if
(
idx
<
migraph
::
range_distance
(
cpu
))
{
std
::
cout
<<
"Mismatch at "
<<
idx
<<
": "
<<
cpu
[
idx
]
<<
" != "
<<
gpu
[
idx
]
<<
std
::
endl
;
}
auto
cpu_nan_idx
=
find_idx
(
cpu
,
migraph
::
not_finite
);
if
(
cpu_nan_idx
>=
0
)
std
::
cout
<<
"Non finite number found in cpu at "
<<
cpu_nan_idx
<<
": "
<<
cpu
[
cpu_nan_idx
]
<<
std
::
endl
;
auto
gpu_nan_idx
=
find_idx
(
gpu
,
migraph
::
not_finite
);
if
(
gpu_nan_idx
>=
0
)
std
::
cout
<<
"Non finite number found in gpu at "
<<
gpu_nan_idx
<<
": "
<<
gpu
[
gpu_nan_idx
]
<<
std
::
endl
;
}
});
}
template
<
class
V
>
void
verify_program
()
{
...
...
@@ -210,6 +179,62 @@ struct test_add_broadcast
}
};
struct
test_add_broadcast2
{
migraph
::
program
create_program
()
const
{
migraph
::
program
p
;
migraph
::
shape
s
{
migraph
::
shape
::
float_type
,
{
3
}};
auto
x
=
p
.
add_parameter
(
"x"
,
{
migraph
::
shape
::
float_type
,
{
2
,
3
,
4
}});
auto
y
=
p
.
add_parameter
(
"y"
,
{
migraph
::
shape
::
float_type
,
{
3
}});
auto
by
=
p
.
add_instruction
(
migraph
::
broadcast
{
1
},
x
,
y
);
p
.
add_instruction
(
migraph
::
add
{},
x
,
by
);
return
p
;
}
};
struct
test_add_broadcast3
{
migraph
::
program
create_program
()
const
{
migraph
::
program
p
;
migraph
::
shape
s
{
migraph
::
shape
::
float_type
,
{
3
}};
auto
x
=
p
.
add_parameter
(
"x"
,
{
migraph
::
shape
::
float_type
,
{
2
,
4
,
5
}});
auto
y
=
p
.
add_parameter
(
"y"
,
{
migraph
::
shape
::
float_type
,
{
4
}});
auto
by
=
p
.
add_instruction
(
migraph
::
broadcast
{
1
},
x
,
y
);
p
.
add_instruction
(
migraph
::
add
{},
x
,
by
);
return
p
;
}
};
struct
test_add_broadcast4
{
migraph
::
program
create_program
()
const
{
migraph
::
program
p
;
migraph
::
shape
s
{
migraph
::
shape
::
float_type
,
{
3
}};
auto
x
=
p
.
add_parameter
(
"x"
,
{
migraph
::
shape
::
float_type
,
{
2
,
3
,
5
}});
auto
y
=
p
.
add_parameter
(
"y"
,
{
migraph
::
shape
::
float_type
,
{
3
}});
auto
by
=
p
.
add_instruction
(
migraph
::
broadcast
{
1
},
x
,
y
);
p
.
add_instruction
(
migraph
::
add
{},
x
,
by
);
return
p
;
}
};
struct
test_add_broadcast5
{
migraph
::
program
create_program
()
const
{
migraph
::
program
p
;
migraph
::
shape
s
{
migraph
::
shape
::
float_type
,
{
3
}};
auto
x
=
p
.
add_parameter
(
"x"
,
{
migraph
::
shape
::
float_type
,
{
2
,
4
,
8
}});
auto
y
=
p
.
add_parameter
(
"y"
,
{
migraph
::
shape
::
float_type
,
{
4
}});
auto
by
=
p
.
add_instruction
(
migraph
::
broadcast
{
1
},
x
,
y
);
p
.
add_instruction
(
migraph
::
add
{},
x
,
by
);
return
p
;
}
};
struct
test_conv_relu
{
migraph
::
program
create_program
()
const
...
...
@@ -414,10 +439,49 @@ struct test_conv_bn_relu_pooling
}
};
struct
test_conv_bn_relu_pooling2
{
static
migraph
::
instruction_ref
add_bn
(
migraph
::
program
&
p
,
migraph
::
instruction_ref
x
,
std
::
size_t
channels
)
{
migraph
::
shape
vars
{
migraph
::
shape
::
float_type
,
{
channels
}};
auto
scale
=
p
.
add_literal
(
migraph
::
abs
(
migraph
::
generate_literal
(
vars
,
1
+
channels
)));
auto
bias
=
p
.
add_literal
(
migraph
::
abs
(
migraph
::
generate_literal
(
vars
,
2
+
channels
)));
auto
mean
=
p
.
add_literal
(
migraph
::
abs
(
migraph
::
generate_literal
(
vars
,
3
+
channels
)));
auto
variance
=
p
.
add_literal
(
migraph
::
abs
(
migraph
::
generate_literal
(
vars
,
4
+
channels
)));
return
p
.
add_instruction
(
migraph
::
batch_norm_inference
{},
x
,
scale
,
bias
,
mean
,
variance
);
}
migraph
::
program
create_program
()
const
{
migraph
::
program
p
;
migraph
::
shape
xs1
{
migraph
::
shape
::
float_type
,
{
1
,
512
,
7
,
7
}};
migraph
::
shape
xs2
{
migraph
::
shape
::
float_type
,
{
1
,
1024
,
14
,
14
}};
migraph
::
shape
ws1
{
migraph
::
shape
::
float_type
,
{
2048
,
512
,
1
,
1
}};
migraph
::
shape
ws2
{
migraph
::
shape
::
float_type
,
{
2048
,
1024
,
1
,
1
}};
auto
x1
=
p
.
add_parameter
(
"x1"
,
xs1
);
auto
w1
=
p
.
add_parameter
(
"w1"
,
ws1
);
auto
conv1
=
p
.
add_instruction
(
migraph
::
convolution
{{
0
,
0
},
{
1
,
1
},
{
1
,
1
}},
x1
,
w1
);
auto
bn1
=
add_bn
(
p
,
conv1
,
2048
);
auto
x2
=
p
.
add_parameter
(
"x2"
,
xs2
);
auto
w2
=
p
.
add_parameter
(
"w2"
,
ws2
);
auto
conv2
=
p
.
add_instruction
(
migraph
::
convolution
{{
0
,
0
},
{
2
,
2
},
{
1
,
1
}},
x2
,
w2
);
auto
bn2
=
add_bn
(
p
,
conv2
,
2048
);
auto
add
=
p
.
add_instruction
(
migraph
::
add
{},
bn1
,
bn2
);
auto
relu
=
p
.
add_instruction
(
migraph
::
activation
{
"relu"
},
add
);
p
.
add_instruction
(
migraph
::
pooling
{
"average"
,
{
1
,
1
},
{
2
,
2
},
{
3
,
3
}},
relu
);
return
p
;
}
};
int
main
()
{
verify_program
<
test_add
>
();
verify_program
<
test_add_broadcast
>
();
verify_program
<
test_add_broadcast2
>
();
verify_program
<
test_add_broadcast3
>
();
verify_program
<
test_add_broadcast4
>
();
verify_program
<
test_add_broadcast5
>
();
verify_program
<
test_conv_relu
>
();
verify_program
<
test_add_relu
>
();
verify_program
<
test_conv_pooling
>
();
...
...
@@ -431,4 +495,5 @@ int main()
verify_program
<
test_batchnorm_inference
>
();
verify_program
<
test_batchnorm_inference_2
>
();
verify_program
<
test_conv_bn_relu_pooling
>
();
verify_program
<
test_conv_bn_relu_pooling2
>
();
}
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment