Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
baac1dab
Commit
baac1dab
authored
May 24, 2023
by
Alan Turner
Browse files
Merge remote-tracking branch 'origin/develop' into ck-host-lib
parents
830dff7a
77042e30
Changes
299
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
453 additions
and
130 deletions
+453
-130
src/targets/gpu/kernels/include/migraphx/kernels/print.hpp
src/targets/gpu/kernels/include/migraphx/kernels/print.hpp
+2
-2
src/targets/gpu/kernels/include/migraphx/kernels/reduce.hpp
src/targets/gpu/kernels/include/migraphx/kernels/reduce.hpp
+52
-46
src/targets/gpu/kernels/include/migraphx/kernels/type_traits.hpp
...gets/gpu/kernels/include/migraphx/kernels/type_traits.hpp
+11
-3
src/targets/gpu/kernels/include/migraphx/kernels/vec.hpp
src/targets/gpu/kernels/include/migraphx/kernels/vec.hpp
+1
-1
src/targets/gpu/lowering.cpp
src/targets/gpu/lowering.cpp
+16
-2
src/targets/gpu/mlir.cpp
src/targets/gpu/mlir.cpp
+137
-26
src/targets/gpu/rocblas.cpp
src/targets/gpu/rocblas.cpp
+2
-17
src/targets/gpu/target.cpp
src/targets/gpu/target.cpp
+15
-8
src/targets/ref/CMakeLists.txt
src/targets/ref/CMakeLists.txt
+1
-2
src/targets/ref/include/migraphx/ref/target.hpp
src/targets/ref/include/migraphx/ref/target.hpp
+0
-2
src/tf/op_parser.cpp
src/tf/op_parser.cpp
+1
-0
src/tf/tf.cpp
src/tf/tf.cpp
+3
-0
src/version.h.in
src/version.h.in
+2
-0
test/CMakeLists.txt
test/CMakeLists.txt
+14
-15
test/api/CMakeLists.txt
test/api/CMakeLists.txt
+6
-3
test/api/test_dynamic_shape.cpp
test/api/test_dynamic_shape.cpp
+57
-0
test/api/test_gpu.cpp
test/api/test_gpu.cpp
+100
-1
test/check_shapes_test.cpp
test/check_shapes_test.cpp
+1
-1
test/fpga/get_target_assignments.cpp
test/fpga/get_target_assignments.cpp
+0
-1
test/fuse_pointwise.cpp
test/fuse_pointwise.cpp
+32
-0
No files found.
src/targets/gpu/kernels/include/migraphx/kernels/print.hpp
View file @
baac1dab
...
@@ -244,13 +244,13 @@ __device__ void print_once(Ts... xs)
...
@@ -244,13 +244,13 @@ __device__ void print_once(Ts... xs)
template
<
class
...
Ts
>
template
<
class
...
Ts
>
__device__
void
println
(
Ts
...
xs
)
__device__
void
println
(
Ts
...
xs
)
{
{
print_each
(
&
cout
ln
,
xs
...);
print_each
(
&
cout
,
xs
...
,
'\n'
);
}
}
template
<
class
...
Ts
>
template
<
class
...
Ts
>
__device__
void
println_once
(
Ts
...
xs
)
__device__
void
println_once
(
Ts
...
xs
)
{
{
print_each_once
(
&
cout
ln
,
xs
...);
print_each_once
(
&
cout
,
xs
...
,
'\n'
);
}
}
}
// namespace migraphx
}
// namespace migraphx
...
...
src/targets/gpu/kernels/include/migraphx/kernels/reduce.hpp
View file @
baac1dab
...
@@ -79,20 +79,21 @@ __device__ void dpp_reduce(T& in, Op op)
...
@@ -79,20 +79,21 @@ __device__ void dpp_reduce(T& in, Op op)
#endif
#endif
// NOLINTNEXTLINE
// NOLINTNEXTLINE
#define MIGRAPHX_DPP_REDUCE(op, prefix
)
\
#define MIGRAPHX_DPP_REDUCE(op, prefix
, sign)
\
__device__ inline void dpp_reduce(double& x, op) { MIGRAPHX_DPP_REDUCE_ASM(x, prefix##_f64); } \
__device__ inline void dpp_reduce(double& x, op) { MIGRAPHX_DPP_REDUCE_ASM(x, prefix##_f64); } \
__device__ inline void dpp_reduce(float& x, op) { MIGRAPHX_DPP_REDUCE_ASM(x, prefix##_f32); } \
__device__ inline void dpp_reduce(float& x, op) { MIGRAPHX_DPP_REDUCE_ASM(x, prefix##_f32); } \
__device__ inline void dpp_reduce(half& x, op) { MIGRAPHX_DPP_REDUCE_ASM(x, prefix##_f16); } \
__device__ inline void dpp_reduce(half& x, op) { MIGRAPHX_DPP_REDUCE_ASM(x, prefix##_f16); } \
__device__ inline void dpp_reduce(int32_t& x, op) \
__device__ inline void dpp_reduce(int32_t& x, op) \
{ \
{ \
MIGRAPHX_DPP_REDUCE_ASM(x, prefix##
_u32);
\
MIGRAPHX_DPP_REDUCE_ASM(x, prefix##
sign##32);
\
} \
} \
__device__ inline void dpp_reduce(uint32_t& x, op) { MIGRAPHX_DPP_REDUCE_ASM(x, prefix##_u32); }
__device__ inline void dpp_reduce(uint32_t& x, op) { MIGRAPHX_DPP_REDUCE_ASM(x, prefix##_u32); }
MIGRAPHX_DPP_REDUCE
(
op
::
sum
,
v_add
)
// Note: when max and min are in int32_t, signed version of instruction needs to be used.
MIGRAPHX_DPP_REDUCE
(
op
::
max
,
v_max
)
MIGRAPHX_DPP_REDUCE
(
op
::
sum
,
v_add
,
_u
)
MIGRAPHX_DPP_REDUCE
(
op
::
min
,
v_min
)
MIGRAPHX_DPP_REDUCE
(
op
::
product
,
v_mul
,
_u
)
MIGRAPHX_DPP_REDUCE
(
op
::
product
,
v_mul
)
MIGRAPHX_DPP_REDUCE
(
op
::
max
,
v_max
,
_i
)
MIGRAPHX_DPP_REDUCE
(
op
::
min
,
v_min
,
_i
)
template
<
class
Op
,
class
T
,
class
Index
,
class
F
>
template
<
class
Op
,
class
T
,
class
Index
,
class
F
>
__device__
auto
block_reduce
(
index
idx
,
Op
op
,
T
init
,
Index
n
,
F
f
)
__device__
auto
block_reduce
(
index
idx
,
Op
op
,
T
init
,
Index
n
,
F
f
)
...
@@ -174,6 +175,25 @@ struct inner_storage_tag
...
@@ -174,6 +175,25 @@ struct inner_storage_tag
template
<
class
T
>
template
<
class
T
>
using
is_inner_storage
=
is_base_of
<
inner_storage_tag
,
remove_cv_t
<
remove_reference_t
<
T
>>>
;
using
is_inner_storage
=
is_base_of
<
inner_storage_tag
,
remove_cv_t
<
remove_reference_t
<
T
>>>
;
template
<
class
Size
,
class
F
>
struct
lazy_inner_storage
:
inner_storage_tag
{
using
type
=
remove_reference_t
<
decltype
(
declval
<
F
>
()(
0
,
_c
<
0
>
))
>
;
F
f
;
constexpr
Size
rsize
()
const
{
return
{};
}
template
<
class
U
,
class
V
>
constexpr
auto
operator
()(
U
j
,
V
d
)
const
{
return
f
(
j
,
d
);
}
};
template
<
class
Size
,
class
F
>
constexpr
lazy_inner_storage
<
Size
,
F
>
make_lazy_inner_storage
(
Size
,
F
f
)
{
return
{{},
f
};
}
template
<
class
R
,
class
F
>
template
<
class
R
,
class
F
>
struct
storage_access
:
F
struct
storage_access
:
F
{
{
...
@@ -278,6 +298,14 @@ struct reducer_base
...
@@ -278,6 +298,14 @@ struct reducer_base
});
});
}
}
template
<
class
F
>
__device__
auto
lazy_inner
(
F
f
)
const
{
return
this
->
inner_sliced
([
=
](
auto
n
,
auto
&&
...
xs
)
{
return
make_lazy_inner_storage
(
n
,
[
=
](
auto
j
,
auto
d
)
{
return
f
(
xs
(
j
,
d
)...);
});
});
}
template
<
class
Op
,
class
T
,
class
Read
>
template
<
class
Op
,
class
T
,
class
Read
>
__device__
auto
reduce
(
Op
op
,
T
init
,
Read
read
)
const
__device__
auto
reduce
(
Op
op
,
T
init
,
Read
read
)
const
{
{
...
@@ -396,25 +424,6 @@ struct block_large
...
@@ -396,25 +424,6 @@ struct block_large
index
idx
;
index
idx
;
Slicer
slice
;
Slicer
slice
;
template
<
class
Size
,
class
F
>
struct
inner_storage
:
inner_storage_tag
{
using
type
=
remove_reference_t
<
decltype
(
declval
<
F
>
()(
0
,
_c
<
0
>
))
>
;
F
f
;
constexpr
Size
rsize
()
const
{
return
{};
}
template
<
class
U
,
class
V
>
constexpr
auto
operator
()(
U
j
,
V
d
)
const
{
return
f
(
j
,
d
);
}
};
template
<
class
Size
,
class
F
>
constexpr
inner_storage
<
Size
,
F
>
make_inner_storage
(
Size
,
F
f
)
{
return
{
f
};
}
template
<
class
Op
,
class
T
,
class
Read
,
class
N
,
class
...
Ts
>
template
<
class
Op
,
class
T
,
class
Read
,
class
N
,
class
...
Ts
>
__device__
auto
reduce_impl
(
Op
op
,
T
init
,
Read
read
,
N
n
,
Ts
&&
...
xs
)
const
__device__
auto
reduce_impl
(
Op
op
,
T
init
,
Read
read
,
N
n
,
Ts
&&
...
xs
)
const
{
{
...
@@ -439,7 +448,7 @@ struct block_large
...
@@ -439,7 +448,7 @@ struct block_large
template
<
class
R
,
class
F
,
class
N
,
class
...
Ts
>
template
<
class
R
,
class
F
,
class
N
,
class
...
Ts
>
__device__
auto
inner_impl
(
F
f
,
N
n
,
Ts
&&
...
xs
)
const
__device__
auto
inner_impl
(
F
f
,
N
n
,
Ts
&&
...
xs
)
const
{
{
return
make_inner_storage
(
n
,
[
=
](
auto
j
,
auto
d
)
{
return
f
(
xs
(
j
,
d
)...);
});
return
make_
lazy_
inner_storage
(
n
,
[
=
](
auto
j
,
auto
d
)
{
return
f
(
xs
(
j
,
d
)...);
});
}
}
};
};
...
@@ -469,25 +478,6 @@ struct lane
...
@@ -469,25 +478,6 @@ struct lane
index
idx
;
index
idx
;
Slicer
slice
;
Slicer
slice
;
template
<
class
Size
,
class
F
>
struct
inner_storage
:
inner_storage_tag
{
using
type
=
remove_reference_t
<
decltype
(
declval
<
F
>
()(
0
,
_c
<
0
>
))
>
;
F
f
;
constexpr
Size
rsize
()
const
{
return
{};
}
template
<
class
U
,
class
V
>
constexpr
auto
operator
()(
U
j
,
V
d
)
const
{
return
f
(
j
,
d
);
}
};
template
<
class
Size
,
class
F
>
constexpr
inner_storage
<
Size
,
F
>
make_inner_storage
(
Size
,
F
f
)
{
return
{
f
};
}
template
<
class
Op
,
class
T
,
class
Read
,
class
N
,
class
U
,
class
...
Us
>
template
<
class
Op
,
class
T
,
class
Read
,
class
N
,
class
U
,
class
...
Us
>
__device__
auto
reduce_impl
(
Op
op
,
T
init
,
Read
read
,
N
n
,
U
&&
x
,
Us
&&
...
xs
)
const
__device__
auto
reduce_impl
(
Op
op
,
T
init
,
Read
read
,
N
n
,
U
&&
x
,
Us
&&
...
xs
)
const
{
{
...
@@ -518,7 +508,7 @@ struct lane
...
@@ -518,7 +508,7 @@ struct lane
template
<
class
R
,
class
F
,
class
N
,
class
...
Ts
>
template
<
class
R
,
class
F
,
class
N
,
class
...
Ts
>
__device__
auto
inner_impl
(
F
f
,
N
n
,
Ts
&&
...
xs
)
const
__device__
auto
inner_impl
(
F
f
,
N
n
,
Ts
&&
...
xs
)
const
{
{
return
make_inner_storage
(
n
,
[
=
](
auto
j
,
auto
d
)
{
return
f
(
xs
(
j
,
d
)...);
});
return
make_
lazy_
inner_storage
(
n
,
[
=
](
auto
j
,
auto
d
)
{
return
f
(
xs
(
j
,
d
)...);
});
}
}
};
};
template
<
class
Slicer
>
template
<
class
Slicer
>
...
@@ -577,5 +567,21 @@ simple_reduce(Op op, T init, Input input, Output output, ReadInput read, WriteOu
...
@@ -577,5 +567,21 @@ simple_reduce(Op op, T init, Input input, Output output, ReadInput read, WriteOu
});
});
}
}
template
<
class
Algo
,
class
Reduced
,
class
Output
,
class
F
>
__device__
void
fused_reduce
(
Output
output
,
F
f
)
{
Algo
::
template
run
<
Reduced
>([
&
](
auto
out_idx
,
auto
r
)
{
auto
result
=
f
(
r
,
out_idx
);
if
constexpr
(
reduce
::
is_inner_storage
<
decltype
(
result
)
>
{})
{
r
.
inner
([
&
](
auto
&
y
,
auto
x
)
{
y
=
x
;
})(
output
,
result
);
}
else
{
r
.
outer
([
&
]
{
output
[
out_idx
]
=
implicit_conversion
(
result
);
});
}
});
}
}
// namespace migraphx
}
// namespace migraphx
#endif // MIGRAPHX_GUARD_KERNELS_REDUCE_HPP
#endif // MIGRAPHX_GUARD_KERNELS_REDUCE_HPP
src/targets/gpu/kernels/include/migraphx/kernels/type_traits.hpp
View file @
baac1dab
...
@@ -218,7 +218,15 @@ using common_type_t = typename common_type<Ts...>::type;
...
@@ -218,7 +218,15 @@ using common_type_t = typename common_type<Ts...>::type;
#define MIGRAPHX_REQUIRES(...) class = enable_if_t<__VA_ARGS__>
#define MIGRAPHX_REQUIRES(...) class = enable_if_t<__VA_ARGS__>
constexpr
unsigned
long
int_max
(
unsigned
long
n
)
{
return
(
1u
<<
(
n
*
8
))
-
1
;
}
constexpr
unsigned
long
int_max
(
unsigned
long
n
)
{
// Note, left shift cannot be used to get the maximum value of int64_type or
// uint64_type because it is undefined behavior to left shift 64 bits for
// these types
if
(
n
==
sizeof
(
int64_t
))
return
-
1
;
return
(
1ul
<<
(
n
*
8
))
-
1
;
}
template
<
class
T
,
template
<
class
T
,
MIGRAPHX_REQUIRES
(
is_integral
<
T
>{}
or
is_floating_point
<
T
>
{}
or
MIGRAPHX_REQUIRES
(
is_integral
<
T
>{}
or
is_floating_point
<
T
>
{}
or
...
@@ -228,9 +236,9 @@ constexpr T numeric_max()
...
@@ -228,9 +236,9 @@ constexpr T numeric_max()
if
constexpr
(
is_integral
<
T
>
{})
if
constexpr
(
is_integral
<
T
>
{})
{
{
if
constexpr
(
is_unsigned
<
T
>
{})
if
constexpr
(
is_unsigned
<
T
>
{})
return
int_max
(
sizeof
(
T
))
*
2
;
else
return
int_max
(
sizeof
(
T
));
return
int_max
(
sizeof
(
T
));
else
return
int_max
(
sizeof
(
T
))
/
2
;
}
}
else
if
constexpr
(
is_same
<
T
,
double
>
{})
else
if
constexpr
(
is_same
<
T
,
double
>
{})
return
__DBL_MAX__
;
return
__DBL_MAX__
;
...
...
src/targets/gpu/kernels/include/migraphx/kernels/vec.hpp
View file @
baac1dab
...
@@ -135,7 +135,7 @@ constexpr vec<vec_type<T>, N> vec_packed_at(T x, I i)
...
@@ -135,7 +135,7 @@ constexpr vec<vec_type<T>, N> vec_packed_at(T x, I i)
return
vec
<
T
,
N
>
{
x
};
return
vec
<
T
,
N
>
{
x
};
else
else
{
{
MIGRAPHX_ASSERT
((
i
+
N
)
<
vec_size
<
T
>
());
MIGRAPHX_ASSERT
((
i
+
N
)
<
=
vec_size
<
T
>
());
vec
<
vec_type
<
T
>
,
N
>
result
=
{
0
};
vec
<
vec_type
<
T
>
,
N
>
result
=
{
0
};
for
(
int
j
=
0
;
j
<
N
;
j
++
)
for
(
int
j
=
0
;
j
<
N
;
j
++
)
{
{
...
...
src/targets/gpu/lowering.cpp
View file @
baac1dab
...
@@ -83,8 +83,7 @@ struct miopen_apply
...
@@ -83,8 +83,7 @@ struct miopen_apply
auto
&
ctx
=
get_context
();
auto
&
ctx
=
get_context
();
int8_x4_format
=
get_int8_x4_format
(
ctx
);
int8_x4_format
=
get_int8_x4_format
(
ctx
);
compute_fp32
=
get_compute_fp32_flag
();
compute_fp32
=
get_compute_fp32_flag
();
offload_copy
=
(
mod
->
name
()
==
"main"
)
?
pass
->
offload_copy
:
false
;
offload_copy
=
(
mod
->
name
()
==
"main"
)
?
pass
->
offload_copy
:
false
;
add_generic_op
(
"contiguous"
);
add_generic_op
(
"contiguous"
);
...
@@ -112,6 +111,7 @@ struct miopen_apply
...
@@ -112,6 +111,7 @@ struct miopen_apply
add_loop_op
();
add_loop_op
();
add_neg_op
();
add_neg_op
();
add_nms_op
();
add_nms_op
();
add_select_module_op
();
}
}
void
copy_params
()
const
void
copy_params
()
const
...
@@ -359,6 +359,20 @@ struct miopen_apply
...
@@ -359,6 +359,20 @@ struct miopen_apply
return
mod
->
replace_instruction
(
ins
,
gpu_out
);
return
mod
->
replace_instruction
(
ins
,
gpu_out
);
});
});
}
}
/**
* Adds dynamic allocation for submodule output parameter.
*/
void
add_select_module_op
()
{
apply_map
.
emplace
(
"select_module"
,
[
=
](
instruction_ref
ins
)
{
auto
s
=
ins
->
get_shape
();
auto
output
=
insert_allocation
(
ins
,
s
);
std
::
vector
<
instruction_ref
>
inputs
=
ins
->
inputs
();
inputs
.
push_back
(
output
);
return
mod
->
replace_instruction
(
ins
,
ins
->
get_operator
(),
inputs
,
ins
->
module_inputs
());
});
}
};
};
void
lowering
::
apply
(
module
&
m
)
const
{
miopen_apply
{
&
m
,
this
}.
apply
();
}
void
lowering
::
apply
(
module
&
m
)
const
{
miopen_apply
{
&
m
,
this
}.
apply
();
}
...
...
src/targets/gpu/mlir.cpp
View file @
baac1dab
...
@@ -30,6 +30,7 @@
...
@@ -30,6 +30,7 @@
#include <mlir-c/BuiltinTypes.h>
#include <mlir-c/BuiltinTypes.h>
#include <mlir-c/Diagnostics.h>
#include <mlir-c/Diagnostics.h>
#include <mlir-c/Dialect/MIGraphX.h>
#include <mlir-c/Dialect/MIGraphX.h>
#include <mlir-c/Dialect/Rock.h>
#include <mlir-c/IntegerSet.h>
#include <mlir-c/IntegerSet.h>
#include <mlir-c/Pass.h>
#include <mlir-c/Pass.h>
#include <mutex>
#include <mutex>
...
@@ -55,12 +56,16 @@
...
@@ -55,12 +56,16 @@
#include <migraphx/permutation.hpp>
#include <migraphx/permutation.hpp>
#include <deque>
#include <deque>
#include <variant>
#include <variant>
#include <fstream>
#include <sstream>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
gpu
{
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_TRACE_MLIR
);
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_TRACE_MLIR
);
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_MLIR_TUNING_DB
);
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_MLIR_TUNING_CFG
);
#ifdef MIGRAPHX_MLIR
#ifdef MIGRAPHX_MLIR
template
<
class
T
,
class
F
,
F
f
>
// NOLINT
template
<
class
T
,
class
F
,
F
f
>
// NOLINT
...
@@ -124,6 +129,8 @@ using mlir_op_printing_flags = MIGRAPHX_MANAGE_MLIR_HANDLE(MlirOpPrintingFlags,
...
@@ -124,6 +129,8 @@ using mlir_op_printing_flags = MIGRAPHX_MANAGE_MLIR_HANDLE(MlirOpPrintingFlags,
using
mlir_region
=
MIGRAPHX_MANAGE_MLIR_HANDLE
(
MlirRegion
,
mlirRegionDestroy
);
using
mlir_region
=
MIGRAPHX_MANAGE_MLIR_HANDLE
(
MlirRegion
,
mlirRegionDestroy
);
using
mlir_block
=
MIGRAPHX_MANAGE_MLIR_HANDLE
(
MlirBlock
,
mlirBlockDestroy
);
using
mlir_block
=
MIGRAPHX_MANAGE_MLIR_HANDLE
(
MlirBlock
,
mlirBlockDestroy
);
using
mlir_pass_manager
=
MIGRAPHX_MANAGE_MLIR_HANDLE
(
MlirPassManager
,
mlirPassManagerDestroy
);
using
mlir_pass_manager
=
MIGRAPHX_MANAGE_MLIR_HANDLE
(
MlirPassManager
,
mlirPassManagerDestroy
);
using
mlir_tuning_table
=
MIGRAPHX_MANAGE_MLIR_HANDLE
(
MlirRockTuningTable
,
mlirRockTuningTableDestroy
);
std
::
string_view
to_string_view
(
MlirStringRef
s
)
{
return
{
s
.
data
,
s
.
length
};
}
std
::
string_view
to_string_view
(
MlirStringRef
s
)
{
return
{
s
.
data
,
s
.
length
};
}
...
@@ -157,10 +164,10 @@ std::string mlir_print(F f, T x)
...
@@ -157,10 +164,10 @@ std::string mlir_print(F f, T x)
return
ss
.
str
();
return
ss
.
str
();
}
}
const
std
::
unordered_set
<
std
::
string
>
&
get_
xdlops_
arch
s
(
)
bool
has_xdlops
(
const
std
::
string
&
tar
get_arch
)
{
{
static
std
::
unordered_set
<
std
::
string
>
supported_archs
{
"gfx908"
,
"gfx90a"
}
;
const
auto
device_name
=
trim
(
split_string
(
target_arch
,
':'
).
front
())
;
return
supported_archs
;
return
(
starts_with
(
device_name
,
"gfx9"
)
and
device_name
>=
"gfx908"
)
;
}
}
struct
mlir_program
struct
mlir_program
...
@@ -190,10 +197,14 @@ struct mlir_program
...
@@ -190,10 +197,14 @@ struct mlir_program
result
=
mlirF64TypeGet
(
ctx
.
get
());
result
=
mlirF64TypeGet
(
ctx
.
get
());
else
if
(
as
.
is_integral
())
else
if
(
as
.
is_integral
())
{
{
if
(
as
.
is_signed
())
// Note: rocMLIR use signless integer type for tensors types. This
result
=
mlirIntegerTypeSignedGet
(
ctx
.
get
(),
as
.
size
()
*
8
);
// will translate to signed implementation for current supported
else
// operations.
result
=
mlirIntegerTypeGet
(
ctx
.
get
(),
as
.
size
()
*
8
);
if
(
as
.
is_unsigned
())
{
MIGRAPHX_THROW
(
"Unsupported type: "
+
std
::
to_string
(
as
.
type_enum
()));
}
result
=
mlirIntegerTypeGet
(
ctx
.
get
(),
as
.
size
()
*
8
);
}
}
else
else
MIGRAPHX_THROW
(
"Unsupported type: "
+
std
::
to_string
(
as
.
type_enum
()));
MIGRAPHX_THROW
(
"Unsupported type: "
+
std
::
to_string
(
as
.
type_enum
()));
...
@@ -313,7 +324,8 @@ struct mlir_program
...
@@ -313,7 +324,8 @@ struct mlir_program
std
::
string
,
std
::
string
,
value
,
value
,
std
::
vector
<
value
>
,
std
::
vector
<
value
>
,
MlirType
>
;
MlirType
,
MlirAttribute
>
;
using
named_attribute_t
=
std
::
pair
<
std
::
string_view
,
attribute_t
>
;
using
named_attribute_t
=
std
::
pair
<
std
::
string_view
,
attribute_t
>
;
MlirNamedAttribute
name_attribute
(
const
named_attribute_t
&
na
)
const
MlirNamedAttribute
name_attribute
(
const
named_attribute_t
&
na
)
const
...
@@ -455,7 +467,7 @@ struct mlir_program
...
@@ -455,7 +467,7 @@ struct mlir_program
auto
ops
=
create_operation_state
(
"func.func"
);
auto
ops
=
create_operation_state
(
"func.func"
);
ops
.
add_attributes
({{
"function_type"
,
make_function_type
(
inputs
,
outputs
)},
ops
.
add_attributes
({{
"function_type"
,
make_function_type
(
inputs
,
outputs
)},
{
"sym_name"
,
s
td
::
string
(
"main"
)
},
{
"sym_name"
,
s
ym_name
},
{
"kernel"
,
std
::
string
(
"mixr"
)},
{
"kernel"
,
std
::
string
(
"mixr"
)},
{
"arch"
,
target_arch
}});
{
"arch"
,
target_arch
}});
ops
.
add_region
(
std
::
move
(
region
));
ops
.
add_region
(
std
::
move
(
region
));
...
@@ -470,13 +482,17 @@ struct mlir_program
...
@@ -470,13 +482,17 @@ struct mlir_program
{
{
if
(
ins
->
name
()
==
"@return"
)
if
(
ins
->
name
()
==
"@return"
)
return
"func.return"
;
return
"func.return"
;
if
(
ins
->
name
()
==
"@literal"
)
{
return
"tosa.const"
;
}
return
"migraphx."
+
ins
->
name
();
return
"migraphx."
+
ins
->
name
();
}
}
static
value
get_operator_value
(
const
operation
&
op
)
static
value
get_operator_value
(
const
operation
&
op
)
{
{
auto
v
=
op
.
to_value
();
auto
v
=
op
.
to_value
();
if
(
op
.
name
()
==
"convolution"
)
if
(
op
.
name
()
==
"convolution"
or
op
.
name
()
==
"quant_convolution"
)
{
{
// Adjust symetrical padding
// Adjust symetrical padding
if
(
v
.
at
(
"padding"
).
size
()
==
v
.
at
(
"stride"
).
size
())
if
(
v
.
at
(
"padding"
).
size
()
==
v
.
at
(
"stride"
).
size
())
...
@@ -498,31 +514,53 @@ struct mlir_program
...
@@ -498,31 +514,53 @@ struct mlir_program
return
ins
->
get_shape
();
return
ins
->
get_shape
();
}
}
static
std
::
string
get_symbol_name
(
const
module
&
m
)
{
for
(
auto
ins
:
iterator_for
(
m
))
{
if
(
ins
->
name
()
==
"convolution"
or
ins
->
name
()
==
"dot"
)
{
return
"mlir_"
+
ins
->
name
();
}
}
return
"main"
;
}
void
parse
(
const
module
&
m
)
void
parse
(
const
module
&
m
)
{
{
sym_name
=
get_symbol_name
(
m
);
auto
mbody
=
mlirModuleGetBody
(
mmodule
.
get
());
auto
mbody
=
mlirModuleGetBody
(
mmodule
.
get
());
std
::
unordered_map
<
instruction_ref
,
MlirValue
>
ins_map
;
std
::
unordered_map
<
instruction_ref
,
MlirValue
>
ins_map
;
auto
fbody
=
insert
(
mbody
,
m
,
ins_map
);
auto
fbody
=
insert
(
mbody
,
m
,
ins_map
);
for
(
auto
ins
:
iterator_for
(
m
))
for
(
auto
ins
:
iterator_for
(
m
))
{
{
if
(
ins
->
name
()
==
"@param"
)
if
(
ins
->
name
()
==
"@param"
)
continue
;
continue
;
if
(
ins
->
name
()
==
"contiguous"
)
{
ins_map
[
ins
]
=
ins_map
[
ins
->
inputs
().
at
(
0
)];
continue
;
}
auto
name
=
get_name
(
ins
);
auto
name
=
get_name
(
ins
);
auto
ops
=
create_operation_state
(
name
);
auto
ops
=
create_operation_state
(
name
);
ops
.
add_attribute_value
(
get_operator_value
(
ins
->
get_operator
()));
ops
.
add_attribute_value
(
get_operator_value
(
ins
->
get_operator
()));
if
(
ins
->
name
()
!=
"@return"
)
if
(
ins
->
name
()
!=
"@return"
)
ops
.
add_results
({
get_shape
(
ins
)});
ops
.
add_results
({
get_shape
(
ins
)});
if
(
ins
->
name
()
==
"convolution"
)
if
(
ins
->
name
()
==
"@literal"
)
{
literal
r
=
ins
->
get_literal
();
MlirType
tensor_type
=
make_tensor
(
ins
->
get_shape
());
MlirAttribute
mlir_value_attr
=
mlirDenseElementsAttrRawBufferGet
(
tensor_type
,
r
.
get_shape
().
bytes
(),
r
.
data
());
ops
.
add_attributes
({{
"value"
,
mlir_value_attr
}});
}
if
(
ins
->
name
()
==
"convolution"
or
ins
->
name
()
==
"dot"
)
{
{
pp
=
pp
=
problem_params
{
ins
->
get_operator
(),
to_shapes
(
ins
->
inputs
()),
ins
->
get_shape
()};
problem_params
{
ins
->
get_operator
(),
to_shapes
(
ins
->
inputs
()),
ins
->
get_shape
()};
// check if HW supports xdlops
// check if HW supports xdlops
auto
target_chip
=
trim
(
split_string
(
target_arch
,
':'
).
front
());
if
(
has_xdlops
(
target_arch
))
bool
xdlops
=
contains
(
get_xdlops_archs
(),
target_chip
);
std
::
string
tuned
=
get_tune_params
(
xdlops
);
if
(
not
tuned
.
empty
())
ops
.
add_attributes
({{
"perf_config"
,
tuned
}});
if
(
xdlops
)
ops
.
add_attributes
({{
"xdlopsV2"
,
true
}});
ops
.
add_attributes
({{
"xdlopsV2"
,
true
}});
}
}
...
@@ -542,15 +580,19 @@ struct mlir_program
...
@@ -542,15 +580,19 @@ struct mlir_program
code_object_op
compile
()
MIGRAPHX_TIDY_CONST
code_object_op
compile
()
MIGRAPHX_TIDY_CONST
{
{
mlir_pass_manager
pm
{
mlirPassManagerCreate
(
ctx
.
get
())};
mlir_pass_manager
pm_front
{
mlirPassManagerCreate
(
ctx
.
get
())};
mlir_pass_manager
pm_back
{
mlirPassManagerCreate
(
ctx
.
get
())};
// 1st pipeline to call
// 1st pipeline to call
mlirMIGraphXAddHighLevelPipeline
(
pm
.
get
());
mlirMIGraphXAddHighLevelPipeline
(
pm_front
.
get
());
mlirPassManagerRun
(
pm_front
.
get
(),
mmodule
.
get
());
// 2nd pipeline to call
// 2nd pipeline to call
mlirMIGraphXAddBackendPipeline
(
pm
.
get
(),
target_arch
.
c_str
());
get_module_tuned
();
mlirPassManagerRun
(
pm
.
get
(),
mmodule
.
get
());
mlirMIGraphXAddBackendPipeline
(
pm_back
.
get
(),
target_arch
.
c_str
());
mlirPassManagerRun
(
pm_back
.
get
(),
mmodule
.
get
());
code_object_op
op
{};
code_object_op
op
{};
op
.
symbol_name
=
"main"
;
op
.
symbol_name
=
sym_name
;
op
.
code_object
=
get_binary
();
op
.
code_object
=
get_binary
();
std
::
tie
(
op
.
global
,
op
.
local
)
=
get_launch_params
();
std
::
tie
(
op
.
global
,
op
.
local
)
=
get_launch_params
();
return
op
;
return
op
;
...
@@ -578,7 +620,74 @@ struct mlir_program
...
@@ -578,7 +620,74 @@ struct mlir_program
MIGRAPHX_THROW
(
"Failed to compile mlir program"
);
MIGRAPHX_THROW
(
"Failed to compile mlir program"
);
}
}
std
::
string
get_tune_params
(
bool
xdlops
)
{
return
get_mlir_perf_for_conv
(
pp
,
xdlops
);
}
std
::
string
get_tune_params
(
bool
xdlops
)
const
{
return
get_mlir_perf_for_conv
(
pp
,
xdlops
);
}
// This function appends to tuning cfg file that could be
// used with rocMLIR tuning scripts.
void
dump_tuning_cfg
(
const
char
*
prob_config
)
const
{
std
::
string
tuning_cfg_path
=
string_value_of
(
MIGRAPHX_MLIR_TUNING_CFG
{});
if
(
!
tuning_cfg_path
.
empty
())
{
std
::
vector
<
std
::
string
>
tokens
=
split_string
(
prob_config
,
'\t'
);
std
::
string
prob
=
tokens
[
1
];
if
(
starts_with
(
prob
,
"conv"
))
{
tuning_cfg_path
+=
".conv"
;
}
else
{
tuning_cfg_path
+=
".gemm"
;
}
std
::
ofstream
tuning_cfg
(
tuning_cfg_path
,
std
::
ios
::
app
);
tuning_cfg
<<
prob
<<
std
::
endl
;
}
}
static
mlir_tuning_table
create_tuning_table
()
{
mlir_tuning_table
tuning_table
{
mlirRockTuningTableCreate
()};
std
::
string
tuning_db_path
=
string_value_of
(
MIGRAPHX_MLIR_TUNING_DB
{});
if
(
!
tuning_db_path
.
empty
())
{
std
::
ifstream
tuning_db_tsv
(
tuning_db_path
);
if
(
tuning_db_tsv
)
{
std
::
string
line
;
while
(
std
::
getline
(
tuning_db_tsv
,
line
))
{
std
::
vector
<
std
::
string
>
tokens
=
split_string
(
line
,
'\t'
);
std
::
string
arch
=
tokens
[
0
];
std
::
string
prob
=
tokens
[
1
];
std
::
string
perf
=
tokens
[
2
];
std
::
string
key
=
arch
.
append
(
"
\t
"
).
append
(
prob
);
mlirRockTuningUpdateTable
(
tuning_table
.
get
(),
key
.
c_str
(),
perf
.
c_str
(),
1.0
);
}
}
}
else
{
std
::
cerr
<<
"WARNING: MLIR tuning db not found. Please set MIGRAPHX_MLIR_TUNING_DB for "
"optimal performance."
<<
std
::
endl
;
}
return
tuning_table
;
}
bool
get_module_tuned
()
const
{
static
mlir_tuning_table
tuning_table
=
create_tuning_table
();
if
(
!
mlirRockTuningSetFromTable
(
tuning_table
.
get
(),
mmodule
.
get
()))
{
const
char
*
prob_config
=
mlirRockTuningGetKey
(
tuning_table
.
get
(),
mmodule
.
get
());
std
::
stringstream
key
(
prob_config
);
std
::
cerr
<<
"fails to set param on"
<<
prob_config
<<
std
::
endl
;
dump_tuning_cfg
(
prob_config
);
return
false
;
}
return
true
;
}
mlir_context
ctx
;
mlir_context
ctx
;
MlirLocation
location
;
MlirLocation
location
;
...
@@ -586,6 +695,7 @@ struct mlir_program
...
@@ -586,6 +695,7 @@ struct mlir_program
problem_params
pp
;
problem_params
pp
;
std
::
deque
<
std
::
string
>
strings
{};
std
::
deque
<
std
::
string
>
strings
{};
std
::
string
target_arch
;
std
::
string
target_arch
;
std
::
string
sym_name
;
};
};
std
::
string
dump_mlir
(
const
module
&
m
)
std
::
string
dump_mlir
(
const
module
&
m
)
...
@@ -645,12 +755,13 @@ code_object_op compile_mlir(const context&, module m, const std::vector<instruct
...
@@ -645,12 +755,13 @@ code_object_op compile_mlir(const context&, module m, const std::vector<instruct
{
{
adjust_param_shapes
(
m
,
inputs
);
adjust_param_shapes
(
m
,
inputs
);
const
bool
trace
=
enabled
(
MIGRAPHX_TRACE_MLIR
{});
const
bool
trace
=
enabled
(
MIGRAPHX_TRACE_MLIR
{});
if
(
trace
)
std
::
cout
<<
m
<<
std
::
endl
;
// set mutex while llvm thread support is disabled.
// set mutex while llvm thread support is disabled.
static
std
::
mutex
g_mlirc_mutex
;
// NOLINT
static
std
::
mutex
g_mlirc_mutex
;
// NOLINT
const
std
::
lock_guard
<
std
::
mutex
>
lock
(
g_mlirc_mutex
);
const
std
::
lock_guard
<
std
::
mutex
>
lock
(
g_mlirc_mutex
);
if
(
trace
)
std
::
cout
<<
m
<<
std
::
endl
;
mlir_program
mp
;
mlir_program
mp
;
mp
.
find_target
();
mp
.
find_target
();
mp
.
parse
(
m
);
mp
.
parse
(
m
);
...
...
src/targets/gpu/rocblas.cpp
View file @
baac1dab
...
@@ -47,32 +47,17 @@ rocblas_handle_ptr create_rocblas_handle_ptr(hipStream_t s)
...
@@ -47,32 +47,17 @@ rocblas_handle_ptr create_rocblas_handle_ptr(hipStream_t s)
return
rb
;
return
rb
;
}
}
const
std
::
unordered_set
<
std
::
string
>&
get_rocblas_fp32_archs
()
{
static
std
::
unordered_set
<
std
::
string
>
supported_archs
{
"gfx908"
,
"gfx90a"
};
return
supported_archs
;
}
bool
get_compute_fp32_flag
()
bool
get_compute_fp32_flag
()
{
{
bool
compute_fp32
=
false
;
#if ROCBLAS_VERSION_MAJOR >= 2 && ROCBLAS_VERSION_MINOR >= 38
const
auto
device_name
=
trim
(
split_string
(
get_device_name
(),
':'
).
front
());
const
auto
device_name
=
trim
(
split_string
(
get_device_name
(),
':'
).
front
());
if
(
contains
(
get_rocblas_fp32_archs
(),
device_name
))
return
(
starts_with
(
device_name
,
"gfx9"
)
and
device_name
>=
"gfx908"
);
compute_fp32
=
true
;
#endif
return
compute_fp32
;
}
}
bool
get_int8_x4_format
(
context
&
ctx
)
bool
get_int8_x4_format
(
context
&
ctx
)
{
{
bool
int8_x4_format
=
true
;
#if ROCBLAS_VERSION_MAJOR >= 2 && ROCBLAS_VERSION_MINOR >= 38
rocblas_gemm_flags
flag
;
rocblas_gemm_flags
flag
;
rocblas_query_int8_layout_flag
(
ctx
.
get_stream
().
get_rocblas
(),
&
flag
);
rocblas_query_int8_layout_flag
(
ctx
.
get_stream
().
get_rocblas
(),
&
flag
);
int8_x4_format
=
(
flag
==
rocblas_gemm_flags_pack_int8x4
);
return
flag
==
rocblas_gemm_flags_pack_int8x4
;
#endif
return
int8_x4_format
;
}
}
}
// namespace gpu
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace MIGRAPHX_INLINE_NS
...
...
src/targets/gpu/target.cpp
View file @
baac1dab
...
@@ -26,13 +26,13 @@
...
@@ -26,13 +26,13 @@
#include <migraphx/check_context.hpp>
#include <migraphx/check_context.hpp>
#include <migraphx/dead_code_elimination.hpp>
#include <migraphx/dead_code_elimination.hpp>
#include <migraphx/eliminate_allocation.hpp>
#include <migraphx/eliminate_allocation.hpp>
#include <migraphx/eliminate_common_subexpression.hpp>
#include <migraphx/eliminate_concat.hpp>
#include <migraphx/eliminate_concat.hpp>
#include <migraphx/eliminate_contiguous.hpp>
#include <migraphx/eliminate_contiguous.hpp>
#include <migraphx/eliminate_data_type.hpp>
#include <migraphx/eliminate_data_type.hpp>
#include <migraphx/eliminate_identity.hpp>
#include <migraphx/eliminate_identity.hpp>
#include <migraphx/eliminate_pad.hpp>
#include <migraphx/eliminate_pad.hpp>
#include <migraphx/fuse_pointwise.hpp>
#include <migraphx/fuse_pointwise.hpp>
#include <migraphx/fuse_reduce.hpp>
#include <migraphx/inline_module.hpp>
#include <migraphx/inline_module.hpp>
#include <migraphx/insert_pad.hpp>
#include <migraphx/insert_pad.hpp>
#include <migraphx/layout_nhwc.hpp>
#include <migraphx/layout_nhwc.hpp>
...
@@ -40,7 +40,7 @@
...
@@ -40,7 +40,7 @@
#include <migraphx/normalize_ops.hpp>
#include <migraphx/normalize_ops.hpp>
#include <migraphx/optimize_module.hpp>
#include <migraphx/optimize_module.hpp>
#include <migraphx/preallocate_param.hpp>
#include <migraphx/preallocate_param.hpp>
#include <migraphx/pro
pagate_constant
.hpp>
#include <migraphx/pro
mote_literals
.hpp>
#include <migraphx/register_target.hpp>
#include <migraphx/register_target.hpp>
#include <migraphx/replace_allocate.hpp>
#include <migraphx/replace_allocate.hpp>
#include <migraphx/rewrite_gelu.hpp>
#include <migraphx/rewrite_gelu.hpp>
...
@@ -48,9 +48,9 @@
...
@@ -48,9 +48,9 @@
#include <migraphx/rewrite_quantization.hpp>
#include <migraphx/rewrite_quantization.hpp>
#include <migraphx/rewrite_rnn.hpp>
#include <migraphx/rewrite_rnn.hpp>
#include <migraphx/schedule.hpp>
#include <migraphx/schedule.hpp>
#include <migraphx/simplify_algebra.hpp>
#include <migraphx/simplify_qdq.hpp>
#include <migraphx/simplify_qdq.hpp>
#include <migraphx/simplify_reshapes.hpp>
#include <migraphx/simplify_reshapes.hpp>
#include <migraphx/split_single_dyn_dim.hpp>
#include <migraphx/gpu/allocation_model.hpp>
#include <migraphx/gpu/allocation_model.hpp>
#include <migraphx/gpu/compile_miopen.hpp>
#include <migraphx/gpu/compile_miopen.hpp>
#include <migraphx/gpu/compile_ops.hpp>
#include <migraphx/gpu/compile_ops.hpp>
...
@@ -74,9 +74,8 @@ inline namespace MIGRAPHX_INLINE_NS {
...
@@ -74,9 +74,8 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace
gpu
{
namespace
gpu
{
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_DISABLE_SCHEDULE_PASS
)
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_DISABLE_SCHEDULE_PASS
)
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_DISABLE_
POINTWIS
E_FUSION
)
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_DISABLE_
REDUC
E_FUSION
)
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_ENABLE_NHWC
)
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_ENABLE_NHWC
)
struct
id_pass
struct
id_pass
{
{
std
::
string
name
()
const
{
return
"id"
;
}
std
::
string
name
()
const
{
return
"id"
;
}
...
@@ -93,20 +92,24 @@ pass enable_pass(bool enabled, pass p)
...
@@ -93,20 +92,24 @@ pass enable_pass(bool enabled, pass p)
std
::
vector
<
pass
>
target
::
get_passes
(
migraphx
::
context
&
gctx
,
const
compile_options
&
options
)
const
std
::
vector
<
pass
>
target
::
get_passes
(
migraphx
::
context
&
gctx
,
const
compile_options
&
options
)
const
{
{
auto
&
ctx
=
any_cast
<
context
>
(
gctx
);
auto
&
ctx
=
any_cast
<
context
>
(
gctx
);
ctx
.
set_exhaustive_tune_flag
(
options
.
exhaustive_tune
);
std
::
set
<
shape
::
type_t
>
unsupported_types
(
shape
::
types
().
begin
(),
shape
::
types
().
end
());
std
::
set
<
shape
::
type_t
>
unsupported_types
(
shape
::
types
().
begin
(),
shape
::
types
().
end
());
unsupported_types
.
erase
(
shape
::
type_t
::
float_type
);
unsupported_types
.
erase
(
shape
::
type_t
::
float_type
);
unsupported_types
.
erase
(
shape
::
type_t
::
half_type
);
unsupported_types
.
erase
(
shape
::
type_t
::
half_type
);
unsupported_types
.
erase
(
shape
::
type_t
::
bool_type
);
unsupported_types
.
erase
(
shape
::
type_t
::
bool_type
);
unsupported_types
.
erase
(
shape
::
type_t
::
int8_type
);
unsupported_types
.
erase
(
shape
::
type_t
::
int8_type
);
unsupported_types
.
erase
(
shape
::
type_t
::
uint8_type
);
unsupported_types
.
erase
(
shape
::
type_t
::
uint8_type
);
unsupported_types
.
erase
(
shape
::
type_t
::
int32_type
);
unsupported_types
.
erase
(
shape
::
type_t
::
tuple_type
);
unsupported_types
.
erase
(
shape
::
type_t
::
tuple_type
);
// clang-format off
// clang-format off
return
return
{
{
split_single_dyn_dim
{},
dead_code_elimination
{},
normalize_ops
{},
normalize_ops
{},
dead_code_elimination
{},
dead_code_elimination
{},
simplify_qdq
{},
simplify_qdq
{},
rewrite_quantization
{},
enable_pass
(
not
mlir_enabled
(),
rewrite_quantization
{}
)
,
dead_code_elimination
{},
dead_code_elimination
{},
eliminate_data_type
{
unsupported_types
,
shape
::
type_t
::
float_type
},
eliminate_data_type
{
unsupported_types
,
shape
::
type_t
::
float_type
},
simplify_reshapes
{},
simplify_reshapes
{},
...
@@ -130,9 +133,11 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
...
@@ -130,9 +133,11 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
fuse_ck_gemm_softmax_gemm
{
&
ctx
},
fuse_ck_gemm_softmax_gemm
{
&
ctx
},
dead_code_elimination
{},
dead_code_elimination
{},
optimize_module
{},
optimize_module
{},
enable_pass
(
not
enabled
(
MIGRAPHX_DISABLE_POINTWISE_FUSION
{}),
fuse_pointwise
{}
)
,
fuse_pointwise
{},
dead_code_elimination
{},
dead_code_elimination
{},
fuse_mlir
{
&
ctx
},
enable_pass
(
not
enabled
(
MIGRAPHX_DISABLE_REDUCE_FUSION
{}),
fuse_reduce
{}),
dead_code_elimination
{},
enable_pass
(
mlir_enabled
(),
fuse_mlir
{
&
ctx
}),
dead_code_elimination
{},
dead_code_elimination
{},
fuse_ck
{
&
ctx
},
fuse_ck
{
&
ctx
},
dead_code_elimination
{},
dead_code_elimination
{},
...
@@ -153,6 +158,8 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
...
@@ -153,6 +158,8 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
dead_code_elimination
{},
dead_code_elimination
{},
compile_ops
{
&
ctx
},
compile_ops
{
&
ctx
},
dead_code_elimination
{},
dead_code_elimination
{},
promote_literals
{},
dead_code_elimination
{},
write_literals
{
&
ctx
},
write_literals
{
&
ctx
},
schedule
{
gpu
::
schedule_model
{
ctx
.
get_current_device
().
nstreams
()},
not
enabled
(
MIGRAPHX_DISABLE_SCHEDULE_PASS
{})},
schedule
{
gpu
::
schedule_model
{
ctx
.
get_current_device
().
nstreams
()},
not
enabled
(
MIGRAPHX_DISABLE_SCHEDULE_PASS
{})},
memory_coloring
{
"hip::allocate"
},
memory_coloring
{
"hip::allocate"
},
...
...
src/targets/ref/CMakeLists.txt
View file @
baac1dab
...
@@ -31,10 +31,9 @@ set_target_properties(migraphx_ref PROPERTIES EXPORT_NAME ref)
...
@@ -31,10 +31,9 @@ set_target_properties(migraphx_ref PROPERTIES EXPORT_NAME ref)
rocm_set_soversion
(
migraphx_ref
${
MIGRAPHX_SO_VERSION
}
)
rocm_set_soversion
(
migraphx_ref
${
MIGRAPHX_SO_VERSION
}
)
find_path
(
BLAZE_INCLUDE blaze/Blaze.h
)
find_path
(
BLAZE_INCLUDE blaze/Blaze.h
)
find_package
(
Threads
)
rocm_clang_tidy_check
(
migraphx_ref
)
rocm_clang_tidy_check
(
migraphx_ref
)
target_link_libraries
(
migraphx_ref migraphx
Threads::Threads
)
target_link_libraries
(
migraphx_ref
PUBLIC
migraphx
)
target_include_directories
(
migraphx_ref PRIVATE
${
BLAZE_INCLUDE
}
)
target_include_directories
(
migraphx_ref PRIVATE
${
BLAZE_INCLUDE
}
)
target_compile_definitions
(
migraphx_ref PRIVATE -DBLAZE_USE_CPP_THREADS
)
target_compile_definitions
(
migraphx_ref PRIVATE -DBLAZE_USE_CPP_THREADS
)
...
...
src/targets/ref/include/migraphx/ref/target.hpp
View file @
baac1dab
...
@@ -46,8 +46,6 @@ struct target
...
@@ -46,8 +46,6 @@ struct target
argument
allocate
(
const
shape
&
s
)
const
;
argument
allocate
(
const
shape
&
s
)
const
;
};
};
MIGRAPHX_REGISTER_TARGET
(
target
);
}
// namespace ref
}
// namespace ref
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
}
// namespace migraphx
...
...
src/tf/op_parser.cpp
View file @
baac1dab
...
@@ -46,6 +46,7 @@ std::vector<std::string> get_op_parsers()
...
@@ -46,6 +46,7 @@ std::vector<std::string> get_op_parsers()
op_parser_map
().
end
(),
op_parser_map
().
end
(),
std
::
back_inserter
(
result
),
std
::
back_inserter
(
result
),
[
&
](
auto
&&
p
)
{
return
p
.
first
;
});
[
&
](
auto
&&
p
)
{
return
p
.
first
;
});
std
::
sort
(
result
.
begin
(),
result
.
end
());
return
result
;
return
result
;
}
}
...
...
src/tf/tf.cpp
View file @
baac1dab
...
@@ -22,6 +22,7 @@
...
@@ -22,6 +22,7 @@
* THE SOFTWARE.
* THE SOFTWARE.
*/
*/
#include <migraphx/tf/tf_parser.hpp>
#include <migraphx/tf/tf_parser.hpp>
#include <migraphx/tf/op_parser.hpp>
#include <iostream>
#include <iostream>
#include <fstream>
#include <fstream>
#include <unordered_map>
#include <unordered_map>
...
@@ -62,5 +63,7 @@ program parse_tf(const std::string& name, const tf_options& options)
...
@@ -62,5 +63,7 @@ program parse_tf(const std::string& name, const tf_options& options)
return
std
::
move
(
parser
.
prog
);
return
std
::
move
(
parser
.
prog
);
}
}
std
::
vector
<
std
::
string
>
get_tf_operators
()
{
return
tf
::
get_op_parsers
();
}
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
}
// namespace migraphx
src/version.h.in
View file @
baac1dab
...
@@ -24,4 +24,6 @@
...
@@ -24,4 +24,6 @@
// clang-format off
// clang-format off
#define MIGRAPHX_VERSION_MAJOR @PROJECT_VERSION_MAJOR@
#define MIGRAPHX_VERSION_MAJOR @PROJECT_VERSION_MAJOR@
#define MIGRAPHX_VERSION_MINOR @PROJECT_VERSION_MINOR@
#define MIGRAPHX_VERSION_MINOR @PROJECT_VERSION_MINOR@
#define MIGRAPHX_VERSION_PATCH @PROJECT_VERSION_PATCH@
#define MIGRAPHX_VERSION_TWEAK @PROJECT_VERSION_TWEAK@
// clang-format on
// clang-format on
test/CMakeLists.txt
View file @
baac1dab
...
@@ -106,19 +106,11 @@ function(add_test_executable TEST_NAME)
...
@@ -106,19 +106,11 @@ function(add_test_executable TEST_NAME)
if
(
CMAKE_CXX_COMPILER_ID MATCHES
"GNU"
)
if
(
CMAKE_CXX_COMPILER_ID MATCHES
"GNU"
)
set_target_properties
(
${
TEST_NAME
}
PROPERTIES COMPILE_FLAGS -pthread LINK_FLAGS -pthread
)
set_target_properties
(
${
TEST_NAME
}
PROPERTIES COMPILE_FLAGS -pthread LINK_FLAGS -pthread
)
endif
()
endif
()
set
(
TEST_COMMAND
${
TEST_NAME
}
)
separate_arguments
(
MIOPEN_TEST_FLAGS_ARGS UNIX_COMMAND
${
MIOPEN_TEST_FLAGS
}
)
if
(
MIOPEN_TEST_ALL
)
set
(
TEST_COMMAND
${
TEST_NAME
}
${
MIOPEN_TEST_FLOAT_ARG
}
--all
${
MIOPEN_TEST_FLAGS_ARGS
}
)
else
()
set
(
TEST_COMMAND
${
TEST_NAME
}
${
MIOPEN_TEST_FLOAT_ARG
}
${
MIOPEN_TEST_FLAGS_ARGS
}
)
endif
()
add_test_command
(
${
TEST_NAME
}
${
TEST_COMMAND
}
)
add_test_command
(
${
TEST_NAME
}
${
TEST_COMMAND
}
)
add_dependencies
(
tests
${
TEST_NAME
}
)
add_dependencies
(
tests
${
TEST_NAME
}
)
add_dependencies
(
check
${
TEST_NAME
}
)
add_dependencies
(
check
${
TEST_NAME
}
)
target_link_libraries
(
${
TEST_NAME
}
migraphx migraphx_
ref
migraphx_
onnx
)
target_link_libraries
(
${
TEST_NAME
}
migraphx migraphx_
onnx
migraphx_
ref
)
target_include_directories
(
${
TEST_NAME
}
PUBLIC include
)
target_include_directories
(
${
TEST_NAME
}
PUBLIC include
)
endfunction
(
add_test_executable
)
endfunction
(
add_test_executable
)
...
@@ -142,6 +134,9 @@ if(MIGRAPHX_ENABLE_GPU)
...
@@ -142,6 +134,9 @@ if(MIGRAPHX_ENABLE_GPU)
COST 10
COST 10
RESOURCE_LOCK gpu
RESOURCE_LOCK gpu
)
)
if
(
MIGRAPHX_USE_HIPRTC
)
target_compile_definitions
(
test_gpu_
${
BASE_NAME
}
PUBLIC -DMIGRAPHX_USE_HIPRTC
)
endif
()
target_link_libraries
(
test_gpu_
${
BASE_NAME
}
migraphx_gpu migraphx_kernels
)
target_link_libraries
(
test_gpu_
${
BASE_NAME
}
migraphx_gpu migraphx_kernels
)
endforeach
()
endforeach
()
endif
()
endif
()
...
@@ -182,7 +177,7 @@ endforeach()
...
@@ -182,7 +177,7 @@ endforeach()
set
(
TEST_TF_DIR
${
CMAKE_CURRENT_SOURCE_DIR
}
/tf
)
set
(
TEST_TF_DIR
${
CMAKE_CURRENT_SOURCE_DIR
}
/tf
)
add_executable
(
test_tf tf/tf_test.cpp
)
add_executable
(
test_tf tf/tf_test.cpp
)
rocm_clang_tidy_check
(
test_tf
)
rocm_clang_tidy_check
(
test_tf
)
target_link_libraries
(
test_tf migraphx_tf
migraphx_ref
)
target_link_libraries
(
test_tf migraphx_tf
)
target_include_directories
(
test_tf PUBLIC include
)
target_include_directories
(
test_tf PUBLIC include
)
add_test
(
NAME test_tf COMMAND $<TARGET_FILE:test_tf> WORKING_DIRECTORY
${
TEST_TF_DIR
}
)
add_test
(
NAME test_tf COMMAND $<TARGET_FILE:test_tf> WORKING_DIRECTORY
${
TEST_TF_DIR
}
)
add_dependencies
(
tests test_tf
)
add_dependencies
(
tests test_tf
)
...
@@ -216,10 +211,7 @@ function(test_headers PREFIX)
...
@@ -216,10 +211,7 @@ function(test_headers PREFIX)
string
(
MAKE_C_IDENTIFIER
${
HEADER_REL
}
TEST_NAME
)
string
(
MAKE_C_IDENTIFIER
${
HEADER_REL
}
TEST_NAME
)
get_filename_component
(
BASE_NAME
${
HEADER
}
NAME_WE
)
get_filename_component
(
BASE_NAME
${
HEADER
}
NAME_WE
)
test_header
(
header_
${
TEST_NAME
}
${
PREFIX
}
/
${
BASE_NAME
}
.hpp
)
test_header
(
header_
${
TEST_NAME
}
${
PREFIX
}
/
${
BASE_NAME
}
.hpp
)
target_link_libraries
(
header_
${
TEST_NAME
}
migraphx_all_targets
)
if
(
MIGRAPHX_ENABLE_GPU
)
target_link_libraries
(
header_
${
TEST_NAME
}
migraphx_gpu
)
endif
()
endforeach
()
endforeach
()
endfunction
()
endfunction
()
...
@@ -229,3 +221,10 @@ test_headers(migraphx/ref ${CMAKE_SOURCE_DIR}/src/targets/ref/include/migraphx/r
...
@@ -229,3 +221,10 @@ test_headers(migraphx/ref ${CMAKE_SOURCE_DIR}/src/targets/ref/include/migraphx/r
if
(
MIGRAPHX_ENABLE_GPU
)
if
(
MIGRAPHX_ENABLE_GPU
)
test_headers
(
migraphx/gpu
${
CMAKE_SOURCE_DIR
}
/src/targets/gpu/include/migraphx/gpu/*.hpp
)
test_headers
(
migraphx/gpu
${
CMAKE_SOURCE_DIR
}
/src/targets/gpu/include/migraphx/gpu/*.hpp
)
endif
()
endif
()
if
(
MIGRAPHX_ENABLE_CPU
)
test_headers
(
migraphx/cpu
${
CMAKE_SOURCE_DIR
}
/src/targets/cpu/include/migraphx/cpu/*.hpp
)
endif
()
if
(
MIGRAPHX_ENABLE_FPGA
)
test_headers
(
migraphx/fpga
${
CMAKE_SOURCE_DIR
}
/src/targets/fpga/include/migraphx/fpga/*.hpp
)
endif
()
test/api/CMakeLists.txt
View file @
baac1dab
...
@@ -25,7 +25,7 @@ function(add_api_test TEST_NAME TEST_SRC TEST_DIR)
...
@@ -25,7 +25,7 @@ function(add_api_test TEST_NAME TEST_SRC TEST_DIR)
set
(
NAME test_api_
${
TEST_NAME
}
)
set
(
NAME test_api_
${
TEST_NAME
}
)
add_executable
(
${
NAME
}
EXCLUDE_FROM_ALL
${
TEST_SRC
}
)
add_executable
(
${
NAME
}
EXCLUDE_FROM_ALL
${
TEST_SRC
}
)
rocm_clang_tidy_check
(
${
NAME
}
)
rocm_clang_tidy_check
(
${
NAME
}
)
target_link_libraries
(
${
NAME
}
migraphx_c migraphx
)
target_link_libraries
(
${
NAME
}
migraphx_c migraphx
migraphx_all_targets
)
target_include_directories
(
${
NAME
}
PUBLIC ../include
)
target_include_directories
(
${
NAME
}
PUBLIC ../include
)
add_test
(
NAME
${
NAME
}
COMMAND $<TARGET_FILE:
${
NAME
}
> WORKING_DIRECTORY
${
TEST_DIR
}
)
add_test
(
NAME
${
NAME
}
COMMAND $<TARGET_FILE:
${
NAME
}
> WORKING_DIRECTORY
${
TEST_DIR
}
)
add_dependencies
(
tests
${
NAME
}
)
add_dependencies
(
tests
${
NAME
}
)
...
@@ -48,6 +48,7 @@ add_api_test(assign test_assign.cpp ${TEST_ONNX_DIR})
...
@@ -48,6 +48,7 @@ add_api_test(assign test_assign.cpp ${TEST_ONNX_DIR})
add_api_test
(
compile_options test_compile_options.cpp
${
TEST_ONNX_DIR
}
)
add_api_test
(
compile_options test_compile_options.cpp
${
TEST_ONNX_DIR
}
)
add_api_test
(
lookup test_lookup.cpp
${
TEST_ONNX_DIR
}
)
add_api_test
(
lookup test_lookup.cpp
${
TEST_ONNX_DIR
}
)
add_api_test
(
module_construct test_module_construct.cpp
${
TEST_ONNX_DIR
}
)
add_api_test
(
module_construct test_module_construct.cpp
${
TEST_ONNX_DIR
}
)
add_api_test
(
dynamic_shape test_dynamic_shape.cpp
${
TEST_ONNX_DIR
}
)
add_api_test
(
ref test_cpu.cpp
${
TEST_ONNX_DIR
}
)
add_api_test
(
ref test_cpu.cpp
${
TEST_ONNX_DIR
}
)
add_api_test
(
save_load test_save_load.cpp
${
TEST_ONNX_DIR
}
)
add_api_test
(
save_load test_save_load.cpp
${
TEST_ONNX_DIR
}
)
add_api_test
(
op test_op_construct.cpp
${
TEST_ONNX_DIR
}
)
add_api_test
(
op test_op_construct.cpp
${
TEST_ONNX_DIR
}
)
...
@@ -56,8 +57,10 @@ add_api_test(custom_op test_custom_op.cpp ${TEST_ONNX_DIR})
...
@@ -56,8 +57,10 @@ add_api_test(custom_op test_custom_op.cpp ${TEST_ONNX_DIR})
add_api_test
(
tf_parser test_tf_parser.cpp
${
TEST_TF_DIR
}
)
add_api_test
(
tf_parser test_tf_parser.cpp
${
TEST_TF_DIR
}
)
# GPU-based tests
# GPU-based tests
if
(
MIGRAPHX_ENABLE_GPU
)
if
(
MIGRAPHX_ENABLE_GPU
)
list
(
APPEND CMAKE_PREFIX_PATH /opt/rocm
)
find_package
(
hip
)
add_api_test
(
gpu test_gpu.cpp
${
TEST_ONNX_DIR
}
)
add_api_test
(
gpu test_gpu.cpp
${
TEST_ONNX_DIR
}
)
target_link_libraries
(
test_api_gpu
migraphx_gpu
)
target_link_libraries
(
test_api_gpu
)
add_api_test
(
custom_op_gpu test_custom_op_gpu.cpp
${
TEST_ONNX_DIR
}
)
add_api_test
(
custom_op_gpu test_custom_op_gpu.cpp
${
TEST_ONNX_DIR
}
)
target_link_libraries
(
test_api_custom_op_gpu
migraphx_gpu
)
target_link_libraries
(
test_api_custom_op_gpu
)
endif
()
endif
()
test/api/test_dynamic_shape.cpp
0 → 100644
View file @
baac1dab
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/migraphx.h>
#include <migraphx/migraphx.hpp>
#include "test.hpp"
TEST_CASE
(
create_dynamic_dimensions
)
{
migraphx
::
dynamic_dimension
dd0
{
1
,
4
};
EXPECT
(
not
dd0
.
is_fixed
());
migraphx
::
dynamic_dimension
dd1
{
4
,
4
};
EXPECT
(
dd1
.
is_fixed
());
migraphx
::
optimals
opts
{
1
,
2
,
4
};
migraphx
::
dynamic_dimension
dd2
{
1
,
4
,
opts
};
migraphx
::
dynamic_dimensions
dyn_dims0
{
dd0
,
dd1
,
dd2
};
CHECK
(
bool
{
dyn_dims0
[
0
]
==
dd0
});
CHECK
(
bool
{
dyn_dims0
[
1
]
==
dd1
});
CHECK
(
bool
{
dyn_dims0
[
2
]
==
dd2
});
CHECK
(
bool
{
dyn_dims0
[
2
]
!=
dd0
});
EXPECT
(
dyn_dims0
.
size
()
==
3
);
}
TEST_CASE
(
create_dynamic_shape
)
{
migraphx
::
dynamic_dimensions
dyn_dims
(
migraphx
::
dynamic_dimension
{
1
,
4
},
migraphx
::
dynamic_dimension
{
78
,
92
},
migraphx
::
dynamic_dimension
{
1
,
4
,
{
1
,
4
}});
migraphx
::
shape
dyn_shape
{
migraphx_shape_float_type
,
dyn_dims
};
CHECK
(
bool
{
dyn_shape
.
dynamic
()});
CHECK
(
bool
{
dyn_shape
.
dyn_dims
()[
0
]
==
migraphx
::
dynamic_dimension
{
1
,
4
}});
migraphx
::
shape
static_shape
{
migraphx_shape_float_type
,
{
3
,
8
}};
EXPECT
(
not
static_shape
.
dynamic
());
}
int
main
(
int
argc
,
const
char
*
argv
[])
{
test
::
run
(
argc
,
argv
);
}
test/api/test_gpu.cpp
View file @
baac1dab
...
@@ -25,7 +25,6 @@
...
@@ -25,7 +25,6 @@
#include <hip/hip_runtime_api.h>
#include <hip/hip_runtime_api.h>
#include <migraphx/migraphx.h>
#include <migraphx/migraphx.h>
#include <migraphx/migraphx.hpp>
#include <migraphx/migraphx.hpp>
#include <migraphx/manage_ptr.hpp>
#include <migraphx/manage_ptr.hpp>
#include "test.hpp"
#include "test.hpp"
...
@@ -35,6 +34,7 @@ TEST_CASE(load_and_run)
...
@@ -35,6 +34,7 @@ TEST_CASE(load_and_run)
auto
shapes_before
=
p
.
get_output_shapes
();
auto
shapes_before
=
p
.
get_output_shapes
();
migraphx
::
compile_options
options
;
migraphx
::
compile_options
options
;
options
.
set_offload_copy
();
options
.
set_offload_copy
();
options
.
set_exhaustive_tune_flag
();
p
.
compile
(
migraphx
::
target
(
"gpu"
),
options
);
p
.
compile
(
migraphx
::
target
(
"gpu"
),
options
);
auto
shapes_after
=
p
.
get_output_shapes
();
auto
shapes_after
=
p
.
get_output_shapes
();
CHECK
(
shapes_before
.
size
()
==
1
);
CHECK
(
shapes_before
.
size
()
==
1
);
...
@@ -71,6 +71,105 @@ hip_ptr get_hip_buffer(size_t size)
...
@@ -71,6 +71,105 @@ hip_ptr get_hip_buffer(size_t size)
return
hip_ptr
{
ptr
};
return
hip_ptr
{
ptr
};
}
}
// TODO: placeholder until we have a way to copy tuple arguments to/from device through c++ api
// TEST_CASE(dynamic_batch_load_and_run)
//{
// migraphx::onnx_options o_options;
// migraphx::dynamic_dimensions dyn_dims = {{1, 4, {2, 4}}, {3, 3}, {4, 4}, {4, 4}};
// o_options.set_dyn_input_parameter_shape("0", dyn_dims);
// dyn_dims = {{2, 2}, {3, 3}, {3, 3}, {3, 3}};
// o_options.set_dyn_input_parameter_shape("1", dyn_dims);
// auto p = migraphx::parse_onnx("conv_dynamic_batch_test.onnx", o_options);
// migraphx::compile_options c_options;
// c_options.set_split_single_dyn_dim();
// p.compile(migraphx::target("gpu"), c_options);
// auto out_shapes = p.get_output_shapes();
// CHECK(out_shapes.size() == 1);
// EXPECT(out_shapes[0].dynamic());
//
// std::vector<float> a(0.12, 2*3*4*4);
// std::vector<float> c(0.75, 2*3*3*3);
//
// auto param_shapes = p.get_parameter_shapes();
// int batch_size = 2;
// std::unordered_map<std::string, migraphx::argument> arg_map;
//
// arg_map["0"] = migraphx::argument(param_shapes["0"].to_static(batch_size), a.data());
// arg_map["1"] = migraphx::argument(param_shapes["1"].to_static(batch_size), c.data());
//
// migraphx::program_parameters pp;
// std::vector<hip_ptr> buffs;
// std::vector<migraphx::argument> args;
//
// // copy to GPU and create parameter map
// for(auto&& name : param_shapes.names())
// {
// if(arg_map.find(name) != arg_map.end())
// {
// args.push_back(arg_map.at(name));
// }
// else
// {
// migraphx::shape static_shape = param_shapes[name].to_static(batch_size);
// auto output_arg = migraphx::argument(static_shape);
// args.push_back(output_arg);
// }
// buffs.push_back(get_hip_buffer(args.rbegin()->get_shape().bytes()));
// auto err = hipMemcpy(buffs.rbegin()->get(),
// args.rbegin()->data(),
// args.rbegin()->get_shape().bytes(),
// hipMemcpyHostToDevice);
// EXPECT(err == hipSuccess);
// pp.add(name, migraphx::argument(args.rbegin()->get_shape(), buffs.rbegin()->get()));
// }
//
// auto output = p.eval(pp)[0];
//
// // copy output back to host
// auto host_arg = migraphx::argument(output.get_shape());
// auto err = hipMemcpy(
// host_arg.data(), output.data(), output.get_shape().bytes(), hipMemcpyDeviceToHost);
// EXPECT(err == hipSuccess);
//}
TEST_CASE
(
dynamic_batch_load_and_run_offload
)
{
migraphx
::
onnx_options
o_options
;
migraphx
::
dynamic_dimensions
dyn_dims
=
{
migraphx
::
dynamic_dimension
{
1
,
4
,
{
2
,
4
}},
migraphx
::
dynamic_dimension
{
3
,
3
},
migraphx
::
dynamic_dimension
{
4
,
4
},
migraphx
::
dynamic_dimension
{
4
,
4
}};
o_options
.
set_dyn_input_parameter_shape
(
"0"
,
dyn_dims
);
dyn_dims
=
{
migraphx
::
dynamic_dimension
{
2
,
2
},
migraphx
::
dynamic_dimension
{
3
,
3
},
migraphx
::
dynamic_dimension
{
3
,
3
},
migraphx
::
dynamic_dimension
{
3
,
3
}};
o_options
.
set_dyn_input_parameter_shape
(
"1"
,
dyn_dims
);
auto
p
=
migraphx
::
parse_onnx
(
"conv_dynamic_batch_test.onnx"
,
o_options
);
auto
shapes_before
=
p
.
get_output_shapes
();
migraphx
::
compile_options
c_options
;
c_options
.
set_offload_copy
();
p
.
compile
(
migraphx
::
target
(
"gpu"
),
c_options
);
auto
out_shapes
=
p
.
get_output_shapes
();
CHECK
(
out_shapes
.
size
()
==
1
);
EXPECT
(
out_shapes
[
0
].
dynamic
());
// batch size = 2
std
::
vector
<
float
>
a
(
2
*
3
*
4
*
4
,
0.12
);
std
::
vector
<
float
>
c
(
2
*
3
*
3
*
3
,
0.75
);
migraphx
::
program_parameters
pp
;
auto
param_shapes
=
p
.
get_parameter_shapes
();
pp
.
add
(
"0"
,
migraphx
::
argument
(
migraphx
::
shape
(
migraphx_shape_float_type
,
{
2
,
3
,
4
,
4
}),
a
.
data
()));
pp
.
add
(
"1"
,
migraphx
::
argument
(
migraphx
::
shape
(
migraphx_shape_float_type
,
{
2
,
3
,
3
,
3
}),
c
.
data
()));
auto
outputs
=
p
.
eval
(
pp
);
CHECK
(
shapes_before
.
size
()
==
outputs
.
size
());
CHECK
(
bool
{
outputs
.
front
().
get_shape
()
==
migraphx
::
shape
(
migraphx_shape_float_type
,
{
2
,
1
,
3
,
3
})});
}
TEST_CASE
(
load_and_run_async
)
TEST_CASE
(
load_and_run_async
)
{
{
auto
p
=
migraphx
::
parse_onnx
(
"conv_relu_maxpool_test.onnx"
);
auto
p
=
migraphx
::
parse_onnx
(
"conv_relu_maxpool_test.onnx"
);
...
...
test/check_shapes_test.cpp
View file @
baac1dab
...
@@ -36,7 +36,7 @@ bool create_shapes(bool dynamic_allowed)
...
@@ -36,7 +36,7 @@ bool create_shapes(bool dynamic_allowed)
try
try
{
{
shape
a
{
shape
::
int64_type
,
{
3
}};
shape
a
{
shape
::
int64_type
,
{
3
}};
shape
b
{
shape
::
float_type
,
{{
3
,
6
,
0
},
{
4
,
4
,
0
}}};
shape
b
{
shape
::
float_type
,
{{
3
,
6
},
{
4
,
4
}}};
auto
op
=
migraphx
::
make_op
(
"add"
);
auto
op
=
migraphx
::
make_op
(
"add"
);
migraphx
::
check_shapes
{{
a
,
b
},
op
,
dynamic_allowed
}.
has
(
2
);
migraphx
::
check_shapes
{{
a
,
b
},
op
,
dynamic_allowed
}.
has
(
2
);
return
true
;
return
true
;
...
...
test/fpga/get_target_assignments.cpp
View file @
baac1dab
...
@@ -26,7 +26,6 @@
...
@@ -26,7 +26,6 @@
#include <migraphx/make_op.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/program.hpp>
#include <migraphx/program.hpp>
#include <migraphx/register_target.hpp>
#include <migraphx/register_target.hpp>
#include <migraphx/fpga/target.hpp>
#include <migraphx/target_assignments.hpp>
#include <migraphx/target_assignments.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/iterator_for.hpp>
...
...
test/fuse_pointwise.cpp
View file @
baac1dab
...
@@ -329,4 +329,36 @@ TEST_CASE(all_scalar_input)
...
@@ -329,4 +329,36 @@ TEST_CASE(all_scalar_input)
EXPECT
(
p1
==
p2
);
EXPECT
(
p1
==
p2
);
}
}
TEST_CASE
(
no_input
)
{
migraphx
::
program
p
;
{
auto
*
mm
=
p
.
get_main_module
();
migraphx
::
shape
g_shape
{
migraphx
::
shape
::
int64_type
,
{
1
},
{
0
}};
migraphx
::
shape
s_indices
{
migraphx
::
shape
::
int32_type
,
{
3
}};
std
::
vector
<
int
>
indices
{
3
,
800
,
800
};
auto
a0
=
mm
->
add_literal
(
migraphx
::
literal
{
s_indices
,
indices
});
auto
a1
=
mm
->
add_literal
(
migraphx
::
literal
{
g_shape
,
{
1
}});
int
axis
=
0
;
auto
out
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"gather"
,
{{
"axis"
,
axis
}}),
a0
,
a1
);
mm
->
add_return
({
out
});
}
run_pass
(
p
);
// This should NOT create a pointwise module if there are no inputs here.
migraphx
::
program
p2
;
{
auto
*
mm
=
p2
.
get_main_module
();
migraphx
::
shape
g_shape
{
migraphx
::
shape
::
int64_type
,
{
1
},
{
0
}};
migraphx
::
shape
s_indices
{
migraphx
::
shape
::
int32_type
,
{
3
}};
std
::
vector
<
int
>
indices
{
3
,
800
,
800
};
auto
a0
=
mm
->
add_literal
(
migraphx
::
literal
{
s_indices
,
indices
});
auto
a1
=
mm
->
add_literal
(
migraphx
::
literal
{
g_shape
,
{
1
}});
int
axis
=
0
;
auto
out
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"gather"
,
{{
"axis"
,
axis
}}),
a0
,
a1
);
mm
->
add_return
({
out
});
}
EXPECT
(
p
==
p2
);
}
int
main
(
int
argc
,
const
char
*
argv
[])
{
test
::
run
(
argc
,
argv
);
}
int
main
(
int
argc
,
const
char
*
argv
[])
{
test
::
run
(
argc
,
argv
);
}
Prev
1
…
7
8
9
10
11
12
13
14
15
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment