Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
11e155c2
Commit
11e155c2
authored
Jun 13, 2022
by
Paul
Browse files
Merge
parents
8a9c5bce
aa7ff911
Changes
397
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
359 additions
and
249 deletions
+359
-249
src/targets/gpu/kernels/include/migraphx/kernels/types.hpp
src/targets/gpu/kernels/include/migraphx/kernels/types.hpp
+3
-1
src/targets/gpu/kernels/include/migraphx/kernels/vec.hpp
src/targets/gpu/kernels/include/migraphx/kernels/vec.hpp
+78
-4
src/targets/gpu/kernels/include/migraphx/kernels/vectorize.hpp
...argets/gpu/kernels/include/migraphx/kernels/vectorize.hpp
+33
-15
src/targets/gpu/lowering.cpp
src/targets/gpu/lowering.cpp
+51
-46
src/targets/gpu/pack_int8_args.cpp
src/targets/gpu/pack_int8_args.cpp
+9
-3
src/targets/gpu/prefuse_ops.cpp
src/targets/gpu/prefuse_ops.cpp
+76
-0
src/targets/gpu/quant_convolution.cpp
src/targets/gpu/quant_convolution.cpp
+13
-6
src/targets/gpu/schedule_model.cpp
src/targets/gpu/schedule_model.cpp
+8
-8
src/targets/gpu/sync_device.cpp
src/targets/gpu/sync_device.cpp
+4
-4
src/targets/gpu/target.cpp
src/targets/gpu/target.cpp
+5
-2
src/targets/gpu/write_literals.cpp
src/targets/gpu/write_literals.cpp
+7
-7
src/targets/ref/CMakeLists.txt
src/targets/ref/CMakeLists.txt
+0
-2
src/targets/ref/gemm.cpp
src/targets/ref/gemm.cpp
+4
-2
src/targets/ref/lowering.cpp
src/targets/ref/lowering.cpp
+2
-119
src/tf/CMakeLists.txt
src/tf/CMakeLists.txt
+1
-1
src/tf/parse_pooling.cpp
src/tf/parse_pooling.cpp
+6
-1
src/tf/tf_parser.cpp
src/tf/tf_parser.cpp
+1
-2
src/value.cpp
src/value.cpp
+26
-22
test/CMakeLists.txt
test/CMakeLists.txt
+4
-4
test/any_ptr.cpp
test/any_ptr.cpp
+28
-0
No files found.
src/targets/gpu/kernels/include/migraphx/kernels/types.hpp
100755 → 100644
View file @
11e155c2
...
...
@@ -6,13 +6,15 @@
namespace
migraphx
{
using
index_int
=
std
::
uint32_t
;
using
diff_int
=
std
::
int32_t
;
#define MIGRAPHX_DEVICE_CONSTEXPR constexpr __device__ __host__ // NOLINT
template
<
class
T
,
index_int
N
>
using
vec
=
T
__attribute__
((
ext_vector_type
(
N
)));
using
half
=
_Float16
;
using
half
=
_Float16
;
using
half2
=
migraphx
::
vec
<
half
,
2
>
;
}
// namespace migraphx
...
...
src/targets/gpu/kernels/include/migraphx/kernels/vec.hpp
View file @
11e155c2
...
...
@@ -46,6 +46,9 @@ constexpr auto vec_at(T x, I i)
}
}
template
<
class
T
>
using
vec_type
=
decltype
(
vec_at
(
T
{},
0
));
template
<
class
...
Ts
>
constexpr
auto
common_vec_size
()
{
...
...
@@ -57,24 +60,36 @@ constexpr auto common_vec_size()
})(
vec_size
<
Ts
>
()...);
}
// Bools can not be used as a vector type so convert it to uint8
template
<
class
T
>
__device__
__host__
T
*
remove_bool
(
T
*
x
)
{
return
x
;
}
inline
__device__
__host__
uint8_t
*
remove_bool
(
bool
*
x
)
{
return
reinterpret_cast
<
uint8_t
*>
(
x
);
}
template
<
index_int
N
,
class
T
>
__device__
__host__
auto
as_vec
(
T
*
x
)
{
if
constexpr
(
N
==
0
)
if
constexpr
(
N
<
2
)
return
x
;
else
return
reinterpret_cast
<
vec
<
T
,
N
>*>
(
x
);
}
template
<
class
T
,
index_int
N
>
using
safe_vec
=
vec
<
conditional_t
<
is_same
<
T
,
bool
>
{},
uint8_t
,
T
>
,
N
>
;
template
<
class
...
Ts
>
constexpr
auto
vec_transform
(
Ts
...
xs
)
{
return
[
=
](
auto
f
)
{
if
constexpr
(
is_any_vec
<
Ts
...
>
())
{
using
type
=
decltype
(
f
(
vec_at
(
xs
,
0
)...));
constexpr
auto
size
=
common_vec_size
<
Ts
...
>
();
vec
<
type
,
size
>
result
=
{
0
};
using
type
=
decltype
(
f
(
vec_at
(
xs
,
0
)...));
constexpr
auto
size
=
common_vec_size
<
Ts
...
>
();
safe_
vec
<
type
,
size
>
result
=
{
0
};
for
(
int
i
=
0
;
i
<
size
;
i
++
)
result
[
i
]
=
f
(
vec_at
(
xs
,
i
)...);
return
result
;
...
...
@@ -86,5 +101,64 @@ constexpr auto vec_transform(Ts... xs)
};
}
// Return a vector type of N from index i in another larger vector
// N will be 2 for half2 packing
template
<
index_int
N
,
class
T
,
class
I
>
constexpr
vec
<
vec_type
<
T
>
,
N
>
vec_packed_at
(
T
x
,
I
i
)
{
if
constexpr
(
vec_size
<
T
>
()
==
0
)
return
vec
<
T
,
N
>
{
x
};
else
{
MIGRAPHX_ASSERT
((
i
+
N
)
<
vec_size
<
T
>
());
vec
<
vec_type
<
T
>
,
N
>
result
=
{
0
};
for
(
int
j
=
0
;
j
<
N
;
j
++
)
{
result
[
j
]
=
x
[
i
+
j
];
}
return
result
;
}
}
template
<
index_int
N
,
class
...
Ts
>
constexpr
auto
vec_packed_transform
(
Ts
...
xs
)
{
return
[
=
](
auto
f
)
{
if
constexpr
(
is_any_vec
<
Ts
...
>
())
{
using
type
=
vec_type
<
decltype
(
f
(
vec_packed_at
<
N
>
(
xs
,
0
)...))
>
;
constexpr
auto
size
=
common_vec_size
<
Ts
...
>
();
safe_vec
<
type
,
size
>
result
=
{
0
};
for
(
int
i
=
0
;
i
<
size
/
N
;
i
++
)
{
// Call the function with packed vectors
safe_vec
<
type
,
N
>
r
=
f
(
vec_packed_at
<
N
>
(
xs
,
i
*
N
)...);
// Copy the packed vectors to the result
for
(
int
j
=
0
;
j
<
N
;
j
++
)
result
[
i
*
N
+
j
]
=
r
[
j
];
}
return
result
;
}
else
{
return
f
(
xs
...);
}
};
}
template
<
class
T
,
class
Op
>
constexpr
auto
vec_reduce
(
T
x
,
Op
op
)
{
if
constexpr
(
vec_size
<
T
>
()
<
2
)
return
x
;
else
{
vec_type
<
T
>
result
=
x
[
0
];
for
(
int
i
=
1
;
i
<
vec_size
<
T
>
();
i
++
)
result
=
op
(
result
,
x
[
i
]);
return
result
;
}
}
}
// namespace migraphx
#endif // MIGRAPHX_GUARD_KERNELS_VEC_HPP
src/targets/gpu/kernels/include/migraphx/kernels/vectorize.hpp
View file @
11e155c2
...
...
@@ -50,19 +50,10 @@ constexpr auto shape_step(Shape s, Axis)
});
}
// Bools can not be used as a vector type so convert it to int8
template
<
class
T
>
__device__
__host__
T
*
remove_bool
(
T
*
x
)
{
return
x
;
}
inline
__device__
__host__
int8_t
*
remove_bool
(
bool
*
x
)
{
return
reinterpret_cast
<
int8_t
*>
(
x
);
}
template
<
index_int
N
,
class
T
,
class
Axis
>
__device__
__host__
auto
as_vec
(
T
x
,
Axis
axis
)
{
if
constexpr
(
N
==
0
)
if
constexpr
(
N
<
2
)
return
x
;
else
return
make_tensor_view
(
as_vec
<
N
>
(
remove_bool
(
x
.
data
())),
...
...
@@ -72,7 +63,7 @@ __device__ __host__ auto as_vec(T x, Axis axis)
template
<
index_int
N
,
class
T
,
class
Axis
>
constexpr
auto
tensor_step
(
T
x
,
Axis
axis
)
{
if
constexpr
(
N
==
0
)
if
constexpr
(
N
<
2
)
{
return
x
;
}
...
...
@@ -157,11 +148,11 @@ constexpr auto find_vectorize_size(P pred)
else
if
constexpr
(
decltype
(
pred
(
_c
<
2
>
)){})
return
_c
<
2
>
;
else
return
_c
<
0
>
;
return
_c
<
1
>
;
}
template
<
class
T
>
__host__
__device__
auto
vectorize
(
T
x
)
__host__
__device__
auto
auto_
vectorize
(
T
x
)
{
if
constexpr
(
tensor_vec_size
<
T
>
()
==
0
)
{
...
...
@@ -194,7 +185,7 @@ inline __device__ __host__ auto auto_vectorize_impl(F f, Ts... xs)
{
MIGRAPHX_ASSERT
(
s
.
strides
[
axis
]
==
0
or
s
.
strides
[
axis
]
==
1
);
MIGRAPHX_ASSERT
(
s
.
lens
[
axis
]
>
0
);
MIGRAPHX_ASSERT
(
n
==
0
or
s
.
lens
[
axis
]
%
n
==
0
);
MIGRAPHX_ASSERT
(
n
==
1
or
s
.
lens
[
axis
]
%
n
==
0
);
if
constexpr
(
s
.
strides
[
axis
]
==
0
)
return
tensor_step
<
n
>
(
x
,
axis
);
else
...
...
@@ -215,7 +206,34 @@ inline __device__ __host__ auto auto_vectorize_impl(F f, Ts... xs)
inline
__device__
__host__
auto
auto_vectorize
()
{
return
[](
auto
...
xs
)
{
return
[
=
](
auto
f
)
{
auto_vectorize_impl
(
f
,
xs
...);
};
};
return
make_transform
([](
auto
f
,
auto
...
xs
)
{
auto_vectorize_impl
(
f
,
xs
...);
});
}
template
<
index_int
N
,
index_int
Axis
,
class
T
>
__device__
__host__
auto
vectorize_tensor
(
T
x
)
{
constexpr
auto
shape
=
get_shape_c
<
T
>
{};
if
constexpr
(
shape
.
lens
[
Axis
]
==
1
)
return
x
;
else
if
constexpr
(
shape
.
strides
[
Axis
]
==
0
)
return
tensor_step
<
N
>
(
x
,
_c
<
Axis
>
);
else
return
as_vec
<
N
>
(
x
,
_c
<
Axis
>
);
}
template
<
index_int
N
,
index_int
Axis
>
__device__
__host__
auto
vectorize
()
{
return
make_transform
([](
auto
f
,
auto
...
xs
)
{
if
constexpr
(
N
<
2
)
{
f
(
xs
...);
}
else
{
f
(
vectorize_tensor
<
N
,
Axis
>
(
xs
)...);
}
});
}
}
// namespace migraphx
...
...
src/targets/gpu/lowering.cpp
View file @
11e155c2
...
...
@@ -20,10 +20,10 @@
#include <migraphx/gpu/abs.hpp>
#include <migraphx/gpu/batch_norm_inference.hpp>
#include <migraphx/gpu/compile_roialign.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/convolution.hpp>
#include <migraphx/gpu/deconvolution.hpp>
#include <migraphx/gpu/device_name.hpp>
#include <migraphx/gpu/elu.hpp>
#include <migraphx/gpu/equal.hpp>
#include <migraphx/gpu/gemm.hpp>
...
...
@@ -40,6 +40,7 @@
#include <migraphx/gpu/rocblas.hpp>
#include <migraphx/gpu/unary_not.hpp>
#include <migraphx/gpu/where.hpp>
#include <migraphx/gpu/compiler.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/program.hpp>
#include <utility>
...
...
@@ -60,6 +61,7 @@ struct miopen_apply
std
::
unordered_map
<
instruction_ref
,
std
::
string
>
prog_output_names
{};
bool
offload_copy
=
false
;
bool
int8_x4_format
=
true
;
bool
compute_fp32
=
false
;
context
&
get_context
()
const
{
...
...
@@ -96,13 +98,22 @@ struct miopen_apply
}
}
const
std
::
unordered_set
<
std
::
string
>&
get_rocblas_fp32_archs
()
{
static
std
::
unordered_set
<
std
::
string
>
supported_archs
{
"gfx908"
,
"gfx90a"
};
return
supported_archs
;
}
void
init
()
{
assert
(
mod
!=
nullptr
);
assert
(
pass
!=
nullptr
);
#if ROCBLAS_VERSION_MAJOR >= 2 && ROCBLAS_VERSION_MINOR >= 38
auto
&
ctx
=
get_context
();
auto
&
ctx
=
get_context
();
const
auto
device_name
=
trim
(
split_string
(
get_device_name
(),
':'
).
front
());
if
(
contains
(
get_rocblas_fp32_archs
(),
device_name
))
compute_fp32
=
true
;
rocblas_gemm_flags
flag
;
rocblas_query_int8_layout_flag
(
ctx
.
get_stream
().
get_rocblas
(),
&
flag
);
int8_x4_format
=
(
flag
==
rocblas_gemm_flags_pack_int8x4
);
...
...
@@ -170,21 +181,14 @@ struct miopen_apply
add_extend_op
(
"pad"
);
add_extend_op
(
"pooling"
);
add_extend_op
(
"prefix_scan_sum"
);
add_extend_op
(
"reduce_max"
);
add_extend_op
(
"reduce_mean"
);
add_extend_op
(
"reduce_min"
);
add_extend_op
(
"reduce_prod"
);
add_extend_op
(
"reduce_sum"
);
add_extend_op
(
"reverse"
);
add_extend_op
(
"rnn_var_sl_last_output"
);
add_extend_op
(
"rnn_var_sl_shift_output"
);
add_extend_op
(
"rnn_var_sl_shift_sequence"
);
add_extend_op
(
"scatter"
);
add_extend_op
(
"scatter
_none
"
);
add_extend_op
(
"softmax"
);
add_extend_op
(
"topk"
);
add_precompile_op
(
"pointwise"
);
add_batch_norm_inference_op
();
add_convolution_op
();
add_deconvolution_op
();
...
...
@@ -195,7 +199,6 @@ struct miopen_apply
add_neg_op
();
add_nms_op
();
add_quant_convolution_op
();
add_roialign
();
}
void
copy_params
()
...
...
@@ -249,11 +252,28 @@ struct miopen_apply
{
check_shape
(
s
,
apply_map
.
at
(
it
->
name
())(
it
));
}
else
if
(
has_compiler_for
(
it
->
name
()))
{
check_shape
(
s
,
insert_precompile_op
(
it
));
}
}
copy_params
();
}
instruction_ref
insert_precompile_op
(
instruction_ref
ins
)
{
auto
output
=
insert_allocation
(
ins
,
ins
->
get_shape
());
std
::
vector
<
instruction_ref
>
refs
=
ins
->
inputs
();
refs
.
push_back
(
output
);
return
mod
->
replace_instruction
(
ins
,
make_op
(
"gpu::precompile_op"
,
{{
"op"
,
to_value
(
ins
->
get_operator
())}}),
refs
,
ins
->
module_inputs
());
}
instruction_ref
insert_allocation
(
instruction_ref
ins
,
const
shape
&
s
,
std
::
string
tag
=
""
)
{
// Instruction's output is an input of the ret instruction
...
...
@@ -337,7 +357,7 @@ struct miopen_apply
}
}
return
mod
->
replace_instruction
(
ins
,
rocblas_gemm
<
Op
>
{
Op
{},
1
,
0
,
int8_x4_format
},
refs
);
ins
,
rocblas_gemm
<
Op
>
{
Op
{},
1
,
0
,
int8_x4_format
,
compute_fp32
},
refs
);
});
}
...
...
@@ -345,8 +365,22 @@ struct miopen_apply
{
apply_map
.
emplace
(
"quant_convolution"
,
[
=
](
instruction_ref
ins
)
{
auto
&&
op
=
any_cast
<
op
::
quant_convolution
>
(
ins
->
get_operator
());
auto
conv
=
miopen_quant_convolution
{
op
,
make_conv
(
op
)};
auto
ws
=
conv
.
compile
(
get_context
(),
ins
->
get_shape
(),
to_shapes
(
ins
->
inputs
()));
shape
ws
;
miopen_quant_convolution
conv
;
auto
compile_quant_conv_with_format
=
[
&
](
bool
format
)
{
conv
=
miopen_quant_convolution
{
op
,
format
,
make_conv
(
op
)};
ws
=
conv
.
compile
(
get_context
(),
ins
->
get_shape
(),
to_shapes
(
ins
->
inputs
()));
};
try
{
compile_quant_conv_with_format
(
int8_x4_format
);
}
catch
(
migraphx
::
exception
&
)
{
// In case no solver supports the default format, retry using the other format.
compile_quant_conv_with_format
(
!
int8_x4_format
);
}
auto
args
=
ins
->
inputs
();
auto
workspace
=
insert_allocation
(
ins
,
ws
,
"workspace"
);
...
...
@@ -356,6 +390,9 @@ struct miopen_apply
});
}
// add_generic_op just constructs the operator with no fields whereas add_extend_op copies over
// the fields Since it doesn't have fields its default constructed
void
add_generic_op
(
const
std
::
string
&
name
)
{
add_generic_op
(
name
,
"gpu::"
+
name
);
}
void
add_generic_op
(
const
std
::
string
&
op_name
,
const
std
::
string
&
gpu_name
)
...
...
@@ -383,21 +420,6 @@ struct miopen_apply
});
}
void
add_precompile_op
(
const
std
::
string
&
name
)
{
apply_map
.
emplace
(
name
,
[
=
](
instruction_ref
ins
)
{
auto
output
=
insert_allocation
(
ins
,
ins
->
get_shape
());
std
::
vector
<
instruction_ref
>
refs
=
ins
->
inputs
();
refs
.
push_back
(
output
);
return
mod
->
replace_instruction
(
ins
,
make_op
(
"gpu::precompile_op"
,
{{
"op"
,
to_value
(
ins
->
get_operator
())}}),
refs
,
ins
->
module_inputs
());
});
}
void
add_batch_norm_inference_op
()
{
apply_map
.
emplace
(
"batch_norm_inference"
,
[
=
](
instruction_ref
ins
)
{
...
...
@@ -432,7 +454,6 @@ struct miopen_apply
reshapes
[
2
],
reshapes
[
3
],
output
);
});
}
...
...
@@ -489,22 +510,6 @@ struct miopen_apply
});
}
void
add_roialign
()
{
apply_map
.
emplace
(
"roialign"
,
[
=
](
instruction_ref
ins
)
{
auto
s
=
ins
->
get_shape
();
auto
op_val
=
ins
->
get_operator
().
to_value
();
auto
output
=
insert_allocation
(
ins
,
s
);
auto
args
=
ins
->
inputs
();
args
.
push_back
(
output
);
auto
io_shapes
=
to_shapes
(
args
);
auto
co
=
compile_roialign
(
get_context
(),
io_shapes
,
op_val
);
return
mod
->
replace_instruction
(
ins
,
co
,
args
);
});
}
// replace the loop operator with gpu_loop operator
void
add_loop_op
()
{
...
...
src/targets/gpu/pack_int8_args.cpp
View file @
11e155c2
...
...
@@ -22,10 +22,10 @@ static instruction_ref pad_ins(module& m, instruction_ref ins, int offset)
auto
pad_k
=
(
k
+
3
)
/
4
*
4
;
auto
pad_lens
=
lens
;
pad_lens
[
lens
.
size
()
+
offset
]
=
pad_k
;
std
::
vector
<
int64_t
>
pad_dims
(
lens
.
size
()
*
2
,
0
);
auto
ret_ins
=
ins
;
auto
ret_ins
=
ins
;
if
(
pad_k
!=
k
)
{
std
::
vector
<
int64_t
>
pad_dims
(
lens
.
size
()
*
2
,
0
);
pad_dims
[
lens
.
size
()
+
offset
]
=
pad_k
-
k
;
shape
ps
{
s
.
type
(),
pad_lens
};
auto
ins_out
=
...
...
@@ -118,7 +118,7 @@ void pack_int8_args::apply(module& m) const
assert
(
val
.
contains
(
"int8_x4_format"
));
if
(
not
val
.
at
(
"int8_x4_format"
).
to
<
bool
>
())
{
return
;
continue
;
}
auto
inputs
=
ins
->
inputs
();
auto
lens
=
inputs
.
at
(
0
)
->
get_shape
().
lens
();
...
...
@@ -156,6 +156,12 @@ void pack_int8_args::apply(module& m) const
}
else
if
(
ins
->
name
()
==
"gpu::quant_convolution"
)
{
auto
val
=
ins
->
get_operator
().
to_value
();
if
(
not
val
.
at
(
"int8_x4_format"
).
to
<
bool
>
())
{
continue
;
}
auto
inputs
=
ins
->
inputs
();
auto
packed_x
=
m
.
insert_instruction
(
ins
,
...
...
src/targets/gpu/prefuse_ops.cpp
0 → 100644
View file @
11e155c2
#include <migraphx/gpu/prefuse_ops.hpp>
#include <migraphx/match/layernorm.hpp>
#include <migraphx/make_op.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
{
struct
find_layernorm
{
auto
matcher
()
const
{
return
match
::
layernorm
();
}
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
auto
ins
=
r
.
result
;
auto
x_ins
=
r
.
instructions
[
"x"
];
if
(
not
x_ins
->
get_shape
().
standard
())
x_ins
=
m
.
insert_instruction
(
ins
,
make_op
(
"contiguous"
),
x_ins
);
auto
relements
=
x_ins
->
get_shape
().
lens
().
back
();
if
(
relements
>
1024
or
(
relements
%
4
!=
0
and
relements
>
256
))
return
;
auto
a
=
m
.
insert_instruction
(
ins
,
make_op
(
"hip::allocate"
,
{{
"shape"
,
to_value
(
x_ins
->
get_shape
())}}));
m
.
replace_instruction
(
ins
,
make_op
(
"gpu::layernorm"
),
x_ins
,
a
);
}
};
struct
find_triaddlayernorm
{
auto
matcher
()
const
{
auto
add1
=
match
::
name
(
"add"
)(
match
::
none_of
(
match
::
is_constant
()),
match
::
args
(
match
::
any
().
bind
(
"z1"
),
match
::
any
().
bind
(
"z2"
)));
auto
add2
=
match
::
name
(
"add"
)(
match
::
either_arg
(
0
,
1
)(
add1
,
match
::
any
().
bind
(
"z3"
)));
return
match
::
layernorm
()(
match
::
var
(
"x"
)(
add2
));
}
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
auto
ins
=
r
.
result
;
auto
x_ins
=
r
.
instructions
[
"z1"
];
auto
y_ins
=
r
.
instructions
[
"z2"
];
auto
z_ins
=
r
.
instructions
[
"z3"
];
for
(
auto
*
pins
:
{
&
x_ins
,
&
y_ins
,
&
z_ins
})
{
if
(
not
(
*
pins
)
->
get_shape
().
standard
())
*
pins
=
m
.
insert_instruction
(
ins
,
make_op
(
"contiguous"
),
*
pins
);
}
auto
relements
=
x_ins
->
get_shape
().
lens
().
back
();
if
(
relements
>
1024
or
(
relements
%
4
!=
0
and
relements
>
256
))
return
;
auto
a
=
m
.
insert_instruction
(
ins
,
make_op
(
"hip::allocate"
,
{{
"shape"
,
to_value
(
x_ins
->
get_shape
())}}));
m
.
replace_instruction
(
ins
,
make_op
(
"gpu::triadd_layernorm"
),
x_ins
,
y_ins
,
z_ins
,
a
);
}
};
}
// namespace
void
prefuse_ops
::
apply
(
module
&
m
)
const
{
match
::
find_matches
(
m
,
find_triaddlayernorm
{},
find_layernorm
{});
}
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/quant_convolution.cpp
View file @
11e155c2
...
...
@@ -16,8 +16,8 @@ argument miopen_quant_convolution::compute(context& ctx,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
{
auto
x_desc
=
make_tensor
(
args
[
0
].
get_shape
(),
true
);
auto
w_desc
=
make_tensor
(
args
[
1
].
get_shape
(),
true
);
auto
x_desc
=
make_tensor
(
args
[
0
].
get_shape
(),
int8_x4_format
);
auto
w_desc
=
make_tensor
(
args
[
1
].
get_shape
(),
int8_x4_format
);
auto
y_desc
=
make_tensor
(
output_shape
);
float
alpha
=
1
;
...
...
@@ -49,8 +49,8 @@ shape miopen_quant_convolution::compile(context& ctx,
std
::
vector
<
shape
>
inputs
)
{
shape
workspace_shape
{};
auto
x_desc
=
make_tensor
(
inputs
[
0
],
true
);
auto
w_desc
=
make_tensor
(
inputs
[
1
],
true
);
auto
x_desc
=
make_tensor
(
inputs
[
0
],
int8_x4_format
);
auto
w_desc
=
make_tensor
(
inputs
[
1
],
int8_x4_format
);
auto
y_desc
=
make_tensor
(
output_shape
);
std
::
size_t
workspace_size
=
0
;
...
...
@@ -62,8 +62,15 @@ shape miopen_quant_convolution::compile(context& ctx,
&
workspace_size
);
workspace_shape
=
shape
{
shape
::
int8_type
,
{
workspace_size
}};
auto
arg_vec4_x
=
to_gpu
(
generate_argument
(
pack_int8_shape
(
inputs
[
0
])));
auto
arg_vec4_w
=
to_gpu
(
generate_argument
(
pack_int8_shape
(
inputs
[
1
])));
auto
x_shape
=
inputs
[
0
];
auto
w_shape
=
inputs
[
1
];
if
(
int8_x4_format
)
{
x_shape
=
pack_int8_shape
(
x_shape
);
w_shape
=
pack_int8_shape
(
w_shape
);
}
auto
arg_vec4_x
=
to_gpu
(
generate_argument
(
x_shape
));
auto
arg_vec4_w
=
to_gpu
(
generate_argument
(
w_shape
));
auto
y
=
allocate_gpu
(
output_shape
);
auto
workspace
=
allocate_gpu
(
workspace_shape
);
...
...
src/targets/gpu/schedule_model.cpp
View file @
11e155c2
...
...
@@ -77,28 +77,28 @@ MIGRAPHX_REGISTER_OP(wait_event)
MIGRAPHX_REGISTER_OP
(
set_stream
)
std
::
size_t
schedule_model
::
concurrency
()
const
{
return
streams
;
}
void
schedule_model
::
sched
(
module
&
p
,
instruction_ref
ins
,
std
::
size_t
n
)
const
void
schedule_model
::
sched
(
module
&
m
,
instruction_ref
ins
,
std
::
size_t
n
)
const
{
auto
last_stream
=
std
::
find_if
(
std
::
make_reverse_iterator
(
ins
),
std
::
make_reverse_iterator
(
p
.
begin
()),
std
::
make_reverse_iterator
(
m
.
begin
()),
[
&
](
auto
&&
i
)
{
return
i
.
name
()
==
"gpu::set_stream"
;
});
if
(
last_stream
!=
std
::
make_reverse_iterator
(
p
.
begin
()))
if
(
last_stream
!=
std
::
make_reverse_iterator
(
m
.
begin
()))
{
auto
&&
op
=
any_cast
<
set_stream
>
(
last_stream
->
get_operator
());
// If the same stream was set earlier then skip
if
(
op
.
stream
==
n
)
return
;
}
p
.
insert_instruction
(
ins
,
set_stream
{
n
});
m
.
insert_instruction
(
ins
,
set_stream
{
n
});
}
void
schedule_model
::
wait
(
module
&
p
,
instruction_ref
ins
,
std
::
size_t
wait_id
)
const
void
schedule_model
::
wait
(
module
&
m
,
instruction_ref
ins
,
std
::
size_t
wait_id
)
const
{
p
.
insert_instruction
(
ins
,
wait_event
{
wait_id
});
m
.
insert_instruction
(
ins
,
wait_event
{
wait_id
});
}
void
schedule_model
::
record
(
module
&
p
,
instruction_ref
ins
,
std
::
size_t
wait_id
)
const
void
schedule_model
::
record
(
module
&
m
,
instruction_ref
ins
,
std
::
size_t
wait_id
)
const
{
p
.
insert_instruction
(
std
::
next
(
ins
),
record_event
{
wait_id
});
m
.
insert_instruction
(
std
::
next
(
ins
),
record_event
{
wait_id
});
}
static
std
::
unordered_map
<
std
::
string
,
std
::
size_t
>
create_weight_map
()
...
...
src/targets/gpu/sync_device.cpp
View file @
11e155c2
...
...
@@ -8,9 +8,9 @@ namespace migraphx {
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
void
sync_device
::
apply
(
module
&
p
)
const
void
sync_device
::
apply
(
module
&
m
)
const
{
auto
last
=
std
::
prev
(
p
.
end
());
auto
last
=
std
::
prev
(
m
.
end
());
if
(
last
->
name
()
==
"@return"
)
{
auto
inputs
=
last
->
inputs
();
...
...
@@ -18,10 +18,10 @@ void sync_device::apply(module& p) const
return
(
i
->
name
()
==
"hip::copy_from_gpu"
);
}))
{
auto
sync_in
=
p
.
insert_instruction
(
last
,
make_op
(
"hip::sync_stream"
),
inputs
);
auto
sync_in
=
m
.
insert_instruction
(
last
,
make_op
(
"hip::sync_stream"
),
inputs
);
if
(
not
inputs
.
empty
())
{
p
.
replace_instruction
(
inputs
.
front
(),
sync_in
);
m
.
replace_instruction
(
inputs
.
front
(),
sync_in
);
}
}
}
...
...
src/targets/gpu/target.cpp
View file @
11e155c2
...
...
@@ -31,6 +31,7 @@
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/eliminate_workspace.hpp>
#include <migraphx/gpu/fuse_ops.hpp>
#include <migraphx/gpu/prefuse_ops.hpp>
#include <migraphx/gpu/lowering.hpp>
#include <migraphx/gpu/mlir_conv.hpp>
#include <migraphx/gpu/pack_int8_args.hpp>
...
...
@@ -44,7 +45,7 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace
gpu
{
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_DISABLE_SCHEDULE_PASS
)
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_
EN
ABLE_POINTWISE_FUSION
)
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_
DIS
ABLE_POINTWISE_FUSION
)
struct
id_pass
{
...
...
@@ -96,11 +97,13 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
simplify_algebra
{},
simplify_reshapes
{},
simplify_algebra
{},
prefuse_ops
{},
dead_code_elimination
{},
auto_contiguous
{},
simplify_reshapes
{},
propagate_constant
{},
dead_code_elimination
{},
enable_pass
(
enabled
(
MIGRAPHX_
EN
ABLE_POINTWISE_FUSION
{}),
fuse_pointwise
{}),
enable_pass
(
not
enabled
(
MIGRAPHX_
DIS
ABLE_POINTWISE_FUSION
{}),
fuse_pointwise
{}),
dead_code_elimination
{},
mlir_conv
{
&
ctx
},
lowering
{
&
ctx
,
options
.
offload_copy
},
...
...
src/targets/gpu/write_literals.cpp
View file @
11e155c2
...
...
@@ -11,25 +11,25 @@ namespace gpu {
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_COPY_LITERALS
)
void
write_literals
::
apply
(
module
&
p
)
const
void
write_literals
::
apply
(
module
&
m
)
const
{
assert
(
ctx
!=
nullptr
);
std
::
size_t
n
=
0
;
for
(
auto
ins
:
iterator_for
(
p
))
for
(
auto
ins
:
iterator_for
(
m
))
{
if
(
ins
->
name
()
==
"@literal"
)
{
if
(
enabled
(
MIGRAPHX_COPY_LITERALS
{}))
{
literal
l
=
ins
->
get_literal
();
auto
pre
=
p
.
add_literal
(
l
);
auto
alloc
=
p
.
insert_instruction
(
std
::
next
(
pre
),
hip_allocate
{
l
.
get_shape
()});
p
.
replace_instruction
(
ins
,
hip_copy_to_gpu
{},
pre
,
alloc
);
auto
pre
=
m
.
add_literal
(
l
);
auto
alloc
=
m
.
insert_instruction
(
std
::
next
(
pre
),
hip_allocate
{
l
.
get_shape
()});
m
.
replace_instruction
(
ins
,
hip_copy_to_gpu
{},
pre
,
alloc
);
}
else
{
std
::
string
id
=
p
.
name
()
+
":@literal:"
+
std
::
to_string
(
n
);
p
.
replace_instruction
(
ins
,
hip_copy_literal
{
ins
->
get_literal
(),
id
});
std
::
string
id
=
m
.
name
()
+
":@literal:"
+
std
::
to_string
(
n
);
m
.
replace_instruction
(
ins
,
hip_copy_literal
{
ins
->
get_literal
(),
id
});
n
++
;
}
}
...
...
src/targets/ref/CMakeLists.txt
View file @
11e155c2
...
...
@@ -15,8 +15,6 @@ target_link_libraries(migraphx_ref migraphx Threads::Threads)
target_include_directories
(
migraphx_ref PRIVATE
${
BLAZE_INCLUDE
}
)
target_compile_definitions
(
migraphx_ref PRIVATE -DBLAZE_USE_CPP_THREADS
)
target_link_libraries
(
migraphx_all_targets INTERFACE migraphx_ref
)
rocm_install_targets
(
TARGETS migraphx_ref
INCLUDE
...
...
src/targets/ref/gemm.cpp
View file @
11e155c2
#include <migraphx/ref/gemm.hpp>
#include <migraphx/dfor.hpp>
#include <migraphx/requires.hpp>
#include <migraphx/
shape_for_each
.hpp>
#include <migraphx/
par_for
.hpp>
#include <blaze/math/CustomMatrix.h>
namespace
migraphx
{
...
...
@@ -74,8 +74,10 @@ void migemm_impl(
assert
(
amat
.
get_shape
().
lens
()[
dim_1
]
==
bmat
.
get_shape
().
lens
()[
dim_0
]);
assert
(
cmat
.
get_shape
().
lens
()[
dim_0
]
==
amat
.
get_shape
().
lens
()[
dim_0
]);
assert
(
cmat
.
get_shape
().
lens
()[
dim_1
]
==
bmat
.
get_shape
().
lens
()[
dim_1
]);
auto
cs
=
cmat
.
get_shape
();
shape_for_each
(
cmat
.
get_shape
(),
[
&
](
const
auto
&
c_idx
)
{
par_for
(
cs
.
elements
(),
[
&
](
auto
i
)
{
auto
c_idx
=
cs
.
multi
(
i
);
auto
a_idx
=
c_idx
;
auto
b_idx
=
c_idx
;
double
s
=
0.0
;
...
...
src/targets/ref/lowering.cpp
View file @
11e155c2
...
...
@@ -16,7 +16,6 @@
#include <migraphx/op/loop.hpp>
#include <migraphx/op/lrn.hpp>
#include <migraphx/op/pad.hpp>
#include <migraphx/op/pooling.hpp>
#include <migraphx/op/softmax.hpp>
#include <migraphx/op/argmax.hpp>
#include <migraphx/op/argmin.hpp>
...
...
@@ -335,109 +334,6 @@ struct ref_im2col
};
MIGRAPHX_REGISTER_OP
(
ref_im2col
)
struct
max_pool
{
static
std
::
string
name
()
{
return
"max"
;
}
template
<
class
T
>
static
T
start
()
{
return
std
::
numeric_limits
<
T
>::
lowest
();
}
static
double
apply
(
double
x
,
double
y
)
{
double
m
=
std
::
max
(
x
,
y
);
return
(
m
);
}
static
double
final
(
double
x
,
std
::
size_t
)
{
return
(
x
);
}
};
struct
avg_pool
{
static
std
::
string
name
()
{
return
"average"
;
}
template
<
class
T
>
static
double
start
()
{
return
0.0
;
}
static
double
apply
(
double
x
,
double
y
)
{
return
x
+
y
;
}
static
double
final
(
double
x
,
std
::
size_t
y
)
{
return
(
y
==
0
)
?
0.0
:
(
x
/
y
);
}
};
template
<
class
Op
>
struct
ref_pooling
:
auto_register_op
<
ref_pooling
<
Op
>>
{
ref_pooling
()
=
default
;
ref_pooling
(
op
::
pooling
pop
)
:
op
(
std
::
move
(
pop
))
{}
op
::
pooling
op
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
migraphx
::
reflect
(
self
.
op
,
f
);
}
std
::
string
name
()
const
{
return
"ref::pooling_"
+
Op
::
name
();
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
return
op
.
normalize_compute_shape
(
inputs
);
}
argument
compute
(
context
&
,
const
shape
&
output_shape
,
std
::
vector
<
argument
>
args
)
const
{
argument
result
{
output_shape
};
visit_all
(
result
,
args
[
0
])([
&
](
auto
output
,
auto
input
)
{
using
type
=
typename
decltype
(
output
)
::
value_type
;
auto
in_s
=
input
.
get_shape
();
auto
in_lens
=
in_s
.
lens
();
std
::
vector
<
std
::
size_t
>
vec_len
(
in_lens
.
begin
()
+
2
,
in_lens
.
end
());
par_for
(
output_shape
.
elements
(),
[
&
](
auto
i
)
{
auto
idx_o
=
output_shape
.
multi
(
i
);
auto
n_dim
=
idx_o
.
size
();
std
::
vector
<
std
::
size_t
>
win_start
;
std
::
vector
<
std
::
size_t
>
win_size
;
for
(
std
::
size_t
dim
=
2
;
dim
<
n_dim
;
++
dim
)
{
auto
d_2
=
dim
-
2
;
int
start
=
static_cast
<
int
>
(
idx_o
[
dim
]
*
op
.
stride
[
d_2
])
-
static_cast
<
int
>
(
op
.
padding
[
d_2
]);
int
end
=
std
::
min
(
start
+
op
.
lengths
[
d_2
],
in_lens
[
dim
]);
start
=
std
::
max
(
start
,
0
);
win_start
.
push_back
(
start
);
win_size
.
push_back
(
end
-
start
);
}
shape
win_shape
{
output_shape
.
type
(),
win_size
};
auto
pool_size
=
win_shape
.
elements
();
double
acc
=
Op
::
template
start
<
type
>();
shape_for_each
(
win_shape
,
[
&
](
auto
idx_w
)
{
auto
idx
=
idx_o
;
std
::
transform
(
idx_w
.
begin
(),
idx_w
.
end
(),
win_start
.
begin
(),
idx
.
begin
()
+
2
,
[](
auto
ii
,
auto
jj
)
{
return
ii
+
jj
;
});
if
(
std
::
all_of
(
idx
.
begin
()
+
2
,
idx
.
end
(),
[
&
](
auto
ii
)
{
return
ii
>=
0
;
})
and
idx
<
in_lens
)
{
acc
=
Op
::
apply
(
acc
,
input
[
in_s
.
index
(
idx
)]);
}
});
output
[
i
]
=
type
(
Op
::
final
(
acc
,
pool_size
));
});
});
return
result
;
}
};
struct
ref_op
{
operation
op
=
op
::
identity
{};
...
...
@@ -609,7 +505,7 @@ struct ref_unary : auto_register_op<ref_unary<Op>>
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
check_shapes
{
inputs
,
*
this
}.
has
(
1
);
auto
s
=
inputs
.
at
(
0
);
const
auto
&
s
=
inputs
.
at
(
0
);
return
{
s
.
type
(),
s
.
lens
()};
}
...
...
@@ -783,11 +679,7 @@ struct ref_apply
init
();
for
(
auto
it
:
iterator_for
(
*
mod
))
{
if
(
it
->
name
()
==
"pooling"
)
{
apply_pooling
(
it
);
}
else
if
(
apply_map
.
count
(
it
->
name
())
>
0
)
if
(
apply_map
.
count
(
it
->
name
())
>
0
)
{
apply_map
.
at
(
it
->
name
())(
it
);
}
...
...
@@ -815,15 +707,6 @@ struct ref_apply
auto
&&
op
=
any_cast
<
Op
>
(
ins
->
get_operator
());
mod
->
replace_instruction
(
ins
,
T
{
op
},
ins
->
inputs
());
}
void
apply_pooling
(
instruction_ref
ins
)
const
{
auto
&&
op
=
any_cast
<
op
::
pooling
>
(
ins
->
get_operator
());
if
(
op
.
mode
==
"max"
)
mod
->
replace_instruction
(
ins
,
ref_pooling
<
max_pool
>
{
op
},
ins
->
inputs
());
else
if
(
op
.
mode
==
"average"
)
mod
->
replace_instruction
(
ins
,
ref_pooling
<
avg_pool
>
{
op
},
ins
->
inputs
());
}
};
void
lowering
::
apply
(
module
&
m
)
const
{
ref_apply
{
&
m
}.
apply
();
}
...
...
src/tf/CMakeLists.txt
View file @
11e155c2
...
...
@@ -19,7 +19,7 @@ target_compile_options(tf-proto PRIVATE -w)
target_link_libraries
(
tf-proto PRIVATE
${
PROTOBUF_LIBRARY
}
)
set_target_properties
(
tf-proto PROPERTIES POSITION_INDEPENDENT_CODE On
)
file
(
GLOB TF_SRCS *.cpp
)
file
(
GLOB TF_SRCS
${
CONFIGURE_DEPENDS
}
*.cpp
)
add_library
(
migraphx_tf
${
TF_SRCS
}
)
target_include_directories
(
migraphx_tf PRIVATE include
)
set_target_properties
(
migraphx_tf PROPERTIES EXPORT_NAME tf
)
...
...
src/tf/parse_pooling.cpp
View file @
11e155c2
...
...
@@ -19,7 +19,12 @@ struct parse_pooling : op_parser<parse_pooling>
tf_parser
::
node_info
info
,
std
::
vector
<
instruction_ref
>
args
)
const
{
op
::
pooling
op
{
starts_with
(
opd
.
tf_name
,
"Max"
)
?
"max"
:
"average"
};
if
(
!
starts_with
(
opd
.
tf_name
,
"Max"
)
&&
!
starts_with
(
opd
.
tf_name
,
"Av"
))
{
MIGRAPHX_THROW
(
"tf pooling mode must be Max or Average"
);
}
op
::
pooling
op
{
starts_with
(
opd
.
tf_name
,
"Max"
)
?
op
::
pooling_mode
::
max
:
op
::
pooling_mode
::
average
};
if
(
contains
(
info
.
attributes
,
"strides"
))
{
...
...
src/tf/tf_parser.cpp
100755 → 100644
View file @
11e155c2
...
...
@@ -499,8 +499,7 @@ literal tf_parser::parse_tensor(const tensorflow::TensorProto& t) const
return
create_literal
(
shape
::
int64_type
,
dims
,
get_data_vals
(
t
.
int64_val
(),
shape_size
));
case
tensorflow
::
DataType
::
DT_BOOL
:
return
create_literal
(
shape
::
int32_type
,
dims
,
get_data_vals
(
t
.
bool_val
(),
shape_size
));
case
tensorflow
::
DataType
::
DT_HALF
:
{
case
tensorflow
::
DataType
::
DT_HALF
:
{
std
::
vector
<
int
>
data_int32
=
get_data_vals
(
t
.
half_val
(),
shape_size
);
std
::
vector
<
uint16_t
>
data_uint16
(
data_int32
.
begin
(),
data_int32
.
end
());
std
::
vector
<
half
>
data_half
;
...
...
src/value.cpp
100755 → 100644
View file @
11e155c2
...
...
@@ -4,6 +4,7 @@
#include <migraphx/errors.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/value.hpp>
#include <migraphx/optional.hpp>
#include <unordered_map>
#include <utility>
...
...
@@ -138,6 +139,7 @@ value::value(const std::string& pkey, const value& rhs)
{
}
value
::
value
(
const
std
::
string
&
pkey
,
const
char
*
i
)
:
value
(
pkey
,
std
::
string
(
i
))
{}
value
::
value
(
const
char
*
i
)
:
value
(
std
::
string
(
i
))
{}
#define MIGRAPHX_VALUE_GENERATE_DEFINE_METHODS(vt, cpp_type) \
...
...
@@ -161,6 +163,12 @@ value::value(const char* i) : value(std::string(i)) {}
const cpp_type* value::if_##vt() const { return x ? x->if_##vt() : nullptr; }
MIGRAPHX_VISIT_VALUE_TYPES
(
MIGRAPHX_VALUE_GENERATE_DEFINE_METHODS
)
value
&
value
::
operator
=
(
const
char
*
c
)
{
*
this
=
std
::
string
{
c
};
return
*
this
;
}
value
&
value
::
operator
=
(
std
::
nullptr_t
)
{
x
=
nullptr
;
...
...
@@ -410,25 +418,12 @@ value value::with_key(const std::string& pkey) const
return
result
;
}
template
<
class
F
,
class
T
,
class
U
,
class
Common
=
typename
std
::
common_type
<
T
,
U
>
::
type
>
auto
compare_common_impl
(
rank
<
1
>
,
F
f
,
const
std
::
string
&
keyx
,
const
T
&
x
,
const
std
::
string
&
keyy
,
const
U
&
y
)
{
return
f
(
std
::
forward_as_tuple
(
keyx
,
Common
(
x
)),
std
::
forward_as_tuple
(
keyy
,
Common
(
y
)));
}
template
<
class
F
>
auto
compare_common_impl
(
rank
<
1
>
,
F
f
,
const
std
::
string
&
keyx
,
std
::
nullptr_t
,
const
std
::
string
&
keyy
,
std
::
nullptr_t
)
{
return
f
(
std
::
forward_as_tuple
(
keyx
,
0
),
std
::
forward_as_tuple
(
keyy
,
0
));
}
template
<
class
F
,
class
T
,
class
U
>
auto
compare_common_impl
(
rank
<
0
>
,
F
,
const
std
::
string
&
,
const
T
&
,
const
std
::
string
&
,
const
U
&
)
template
<
class
T
>
const
T
&
compare_decay
(
const
T
&
x
)
{
return
false
;
return
x
;
}
int
compare_decay
(
std
::
nullptr_t
)
{
return
0
;
}
template
<
class
F
>
bool
compare
(
const
value
&
x
,
const
value
&
y
,
F
f
)
...
...
@@ -436,7 +431,11 @@ bool compare(const value& x, const value& y, F f)
bool
result
=
false
;
x
.
visit_value
([
&
](
auto
&&
a
)
{
y
.
visit_value
([
&
](
auto
&&
b
)
{
result
=
compare_common_impl
(
rank
<
1
>
{},
f
,
x
.
get_key
(),
a
,
y
.
get_key
(),
b
);
if
constexpr
(
std
::
is_same
<
decltype
(
a
),
decltype
(
b
)
>
{})
result
=
f
(
std
::
forward_as_tuple
(
x
.
get_key
(),
compare_decay
(
a
)),
std
::
forward_as_tuple
(
y
.
get_key
(),
compare_decay
(
b
)));
else
assert
(
false
);
// NOLINT
});
});
return
result
;
...
...
@@ -455,11 +454,16 @@ bool operator==(const value& x, const value& y)
return
false
;
return
compare
(
x
,
y
,
std
::
equal_to
<>
{});
}
bool
operator
!=
(
const
value
&
x
,
const
value
&
y
)
{
return
!
(
x
==
y
);
}
bool
operator
<
(
const
value
&
x
,
const
value
&
y
)
{
return
compare
(
x
,
y
,
std
::
less
<>
{});
}
bool
operator
<=
(
const
value
&
x
,
const
value
&
y
)
{
return
x
==
y
or
x
<
y
;
}
bool
operator
!=
(
const
value
&
x
,
const
value
&
y
)
{
return
not
(
x
==
y
);
}
bool
operator
<
(
const
value
&
x
,
const
value
&
y
)
{
if
(
x
.
get_type
()
!=
y
.
get_type
())
return
x
.
get_type
()
<
y
.
get_type
();
return
compare
(
x
,
y
,
std
::
less
<>
{});
}
bool
operator
<=
(
const
value
&
x
,
const
value
&
y
)
{
return
not
(
x
>
y
);
}
bool
operator
>
(
const
value
&
x
,
const
value
&
y
)
{
return
y
<
x
;
}
bool
operator
>=
(
const
value
&
x
,
const
value
&
y
)
{
return
x
==
y
or
x
>
y
;
}
bool
operator
>=
(
const
value
&
x
,
const
value
&
y
)
{
return
not
(
x
<
y
)
;
}
void
print_value
(
std
::
ostream
&
os
,
std
::
nullptr_t
)
{
os
<<
"null"
;
}
...
...
test/CMakeLists.txt
View file @
11e155c2
...
...
@@ -90,7 +90,7 @@ function(add_test_executable TEST_NAME)
target_include_directories
(
${
TEST_NAME
}
PUBLIC include
)
endfunction
(
add_test_executable
)
file
(
GLOB TESTS *.cpp
)
file
(
GLOB TESTS
${
CONFIGURE_DEPENDS
}
*.cpp
)
foreach
(
TEST
${
TESTS
}
)
get_filename_component
(
BASE_NAME
${
TEST
}
NAME_WE
)
...
...
@@ -100,7 +100,7 @@ endforeach()
if
(
MIGRAPHX_ENABLE_GPU
)
# gpu tests
file
(
GLOB GPU_TESTS gpu/*.cpp
)
file
(
GLOB GPU_TESTS
${
CONFIGURE_DEPENDS
}
gpu/*.cpp
)
foreach
(
TEST
${
GPU_TESTS
}
)
get_filename_component
(
BASE_NAME
${
TEST
}
NAME_WE
)
...
...
@@ -120,7 +120,7 @@ file (GLOB ONNX_TESTS ${TEST_ONNX_DIR}/*.cpp)
foreach
(
ONNX_TEST
${
ONNX_TESTS
}
)
get_filename_component
(
BASE_NAME
${
ONNX_TEST
}
NAME_WE
)
set
(
TEST_NAME test_
${
BASE_NAME
}
)
add_executable
(
${
TEST_NAME
}
${
TES_ONNX_DIR
}
/
${
ONNX_TEST
}
)
add_executable
(
${
TEST_NAME
}
${
ONNX_TEST
}
)
rocm_clang_tidy_check
(
${
TEST_NAME
}
)
target_link_libraries
(
${
TEST_NAME
}
migraphx_onnx migraphx_ref
)
target_include_directories
(
${
TEST_NAME
}
PUBLIC include
)
...
...
@@ -160,7 +160,7 @@ function(test_header NAME HEADER)
endfunction
()
function
(
test_headers PREFIX
)
file
(
GLOB HEADERS
${
ARGN
}
)
file
(
GLOB HEADERS
${
CONFIGURE_DEPENDS
}
${
ARGN
}
)
foreach
(
HEADER
${
HEADERS
}
)
file
(
RELATIVE_PATH HEADER_REL
${
CMAKE_SOURCE_DIR
}
${
HEADER
}
)
...
...
test/any_ptr.cpp
0 → 100644
View file @
11e155c2
#include <migraphx/any_ptr.hpp>
#include <test.hpp>
TEST_CASE
(
test_int_id
)
{
int
i
=
1
;
migraphx
::
any_ptr
p
=
&
i
;
EXPECT
(
p
.
get
<
int
*>
()
==
&
i
);
EXPECT
(
p
.
get
(
migraphx
::
get_type_name
(
i
))
==
&
i
);
EXPECT
(
p
.
unsafe_get
()
==
&
i
);
EXPECT
(
test
::
throws
([
&
]
{
p
.
get
<
float
*>
();
}));
EXPECT
(
test
::
throws
([
&
]
{
p
.
get
(
migraphx
::
get_type_name
(
&
i
));
}));
}
TEST_CASE
(
test_int_name
)
{
int
i
=
1
;
void
*
vp
=
&
i
;
migraphx
::
any_ptr
p
{
vp
,
migraphx
::
get_type_name
(
i
)};
EXPECT
(
p
.
get
<
int
*>
()
==
&
i
);
EXPECT
(
p
.
get
(
migraphx
::
get_type_name
(
i
))
==
&
i
);
EXPECT
(
p
.
unsafe_get
()
==
&
i
);
EXPECT
(
test
::
throws
([
&
]
{
p
.
get
<
float
*>
();
}));
EXPECT
(
test
::
throws
([
&
]
{
p
.
get
(
migraphx
::
get_type_name
(
&
i
));
}));
EXPECT
(
test
::
throws
([
&
]
{
p
.
get
(
migraphx
::
get_type_name
(
float
{}));
}));
}
int
main
(
int
argc
,
const
char
*
argv
[])
{
test
::
run
(
argc
,
argv
);
}
Prev
1
…
9
10
11
12
13
14
15
16
17
…
20
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment