Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
97d4bb6c
Unverified
Commit
97d4bb6c
authored
Jul 25, 2023
by
Ted Themistokleous
Committed by
GitHub
Jul 25, 2023
Browse files
Merge branch 'develop' into add_parity_check_ci
parents
39b097c7
bdbc38bc
Changes
106
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
416 additions
and
181 deletions
+416
-181
src/targets/gpu/fuse_mlir.cpp
src/targets/gpu/fuse_mlir.cpp
+1
-0
src/targets/gpu/include/migraphx/gpu/contiguous.hpp
src/targets/gpu/include/migraphx/gpu/contiguous.hpp
+0
-2
src/targets/gpu/include/migraphx/gpu/convolution.hpp
src/targets/gpu/include/migraphx/gpu/convolution.hpp
+53
-28
src/targets/gpu/include/migraphx/gpu/miopen.hpp
src/targets/gpu/include/migraphx/gpu/miopen.hpp
+29
-7
src/targets/gpu/kernels/include/migraphx/kernels/debug.hpp
src/targets/gpu/kernels/include/migraphx/kernels/debug.hpp
+4
-2
src/targets/gpu/lowering.cpp
src/targets/gpu/lowering.cpp
+1
-1
src/targets/gpu/mlir.cpp
src/targets/gpu/mlir.cpp
+27
-10
src/targets/gpu/rocblas.cpp
src/targets/gpu/rocblas.cpp
+7
-0
src/targets/ref/lowering.cpp
src/targets/ref/lowering.cpp
+1
-1
src/tf/parse_batchnorm.cpp
src/tf/parse_batchnorm.cpp
+5
-6
src/verify_args.cpp
src/verify_args.cpp
+13
-13
test/api/test_gpu.cpp
test/api/test_gpu.cpp
+0
-1
test/gpu/codegen_literal.cpp
test/gpu/codegen_literal.cpp
+1
-1
test/gpu/manage_host_buffer.cpp
test/gpu/manage_host_buffer.cpp
+1
-1
test/gpu/quantization.cpp
test/gpu/quantization.cpp
+3
-3
test/multi_target/multitarget_test.cpp
test/multi_target/multitarget_test.cpp
+242
-101
test/onnx/.onnxrt-commit
test/onnx/.onnxrt-commit
+1
-1
test/onnx/conv_transpose_auto_pad_test.onnx
test/onnx/conv_transpose_auto_pad_test.onnx
+24
-0
test/onnx/conv_transpose_bias_test.onnx
test/onnx/conv_transpose_bias_test.onnx
+3
-3
test/onnx/conv_transpose_dyn_asym_padding_test.onnx
test/onnx/conv_transpose_dyn_asym_padding_test.onnx
+0
-0
No files found.
src/targets/gpu/fuse_mlir.cpp
View file @
97d4bb6c
...
@@ -216,6 +216,7 @@ struct find_mlir_op
...
@@ -216,6 +216,7 @@ struct find_mlir_op
"quant_dot"
,
"quant_dot"
,
"add"
,
"add"
,
"clip"
,
"clip"
,
"relu"
,
"sub"
,
"sub"
,
"mul"
,
"mul"
,
"div"
,
"div"
,
...
...
src/targets/gpu/include/migraphx/gpu/contiguous.hpp
View file @
97d4bb6c
...
@@ -41,8 +41,6 @@ struct miopen_contiguous : unary_device<miopen_contiguous, &device::contiguous>
...
@@ -41,8 +41,6 @@ struct miopen_contiguous : unary_device<miopen_contiguous, &device::contiguous>
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
{
check_shapes
{
inputs
,
*
this
}.
has
(
2
);
check_shapes
{
inputs
,
*
this
}.
has
(
2
);
if
(
inputs
.
front
().
standard
())
return
inputs
.
front
();
auto
lens
=
inputs
.
at
(
0
).
lens
();
auto
lens
=
inputs
.
at
(
0
).
lens
();
auto
t
=
inputs
.
at
(
0
).
type
();
auto
t
=
inputs
.
at
(
0
).
type
();
return
{
t
,
lens
};
return
{
t
,
lens
};
...
...
src/targets/gpu/include/migraphx/gpu/convolution.hpp
View file @
97d4bb6c
...
@@ -31,7 +31,7 @@
...
@@ -31,7 +31,7 @@
#include <migraphx/op/identity.hpp>
#include <migraphx/op/identity.hpp>
#include <migraphx/op/convolution.hpp>
#include <migraphx/op/convolution.hpp>
#include <migraphx/op/quant_convolution.hpp>
#include <migraphx/op/quant_convolution.hpp>
#include <migraphx/op/
de
convolution.hpp>
#include <migraphx/op/convolution
_backwards
.hpp>
#include <unordered_map>
#include <unordered_map>
#include <migraphx/reflect.hpp>
#include <migraphx/reflect.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/context.hpp>
...
@@ -146,7 +146,8 @@ struct miopen_convolution
...
@@ -146,7 +146,8 @@ struct miopen_convolution
void
set_conv_descriptor
()
void
set_conv_descriptor
()
{
{
cd
=
(
op
.
name
()
==
"deconvolution"
)
?
make_deconv
(
op
)
:
make_conv
(
op
);
cd
=
(
op
.
name
()
==
"convolution_backwards"
)
?
make_convolution_backwards
(
op
)
:
make_conv
(
op
);
}
}
value
compile
(
migraphx
::
context
&
ctx
,
const
shape
&
output
,
const
std
::
vector
<
shape
>&
input
)
value
compile
(
migraphx
::
context
&
ctx
,
const
shape
&
output
,
const
std
::
vector
<
shape
>&
input
)
...
@@ -159,10 +160,31 @@ struct miopen_convolution
...
@@ -159,10 +160,31 @@ struct miopen_convolution
shape
find
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
shape
>&
inputs
)
shape
find
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
shape
>&
inputs
)
{
{
shape
workspace_shape
{};
shape
workspace_shape
{};
auto
x_desc
=
make_tensor
(
reshape_if_1d
(
inputs
[
0
]),
int8_x4_format
);
auto
x_desc
=
make_tensor
(
reshape_if_1d
(
inputs
[
0
]),
int8_x4_format
);
auto
w_desc
=
make_tensor
(
reshape_if_1d
(
inputs
[
1
]),
int8_x4_format
);
auto
w_desc
=
make_tensor
(
reshape_if_1d
(
inputs
[
1
]),
int8_x4_format
);
auto
y_desc
=
make_tensor
(
reshape_if_1d
(
output_shape
));
auto
y_desc
=
make_tensor
(
reshape_if_1d
(
output_shape
));
auto
*
miopen_stream_handle
=
ctx
.
get_stream
().
get_miopen
();
std
::
size_t
workspace_size
=
0
;
std
::
size_t
workspace_size
=
0
;
auto
status
=
miopenConvolutionForwardGetWorkSpaceSize
(
miopen_stream_handle
,
w_desc
.
get
(),
x_desc
.
get
(),
cd
.
get
(),
y_desc
.
get
(),
&
workspace_size
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen"
+
op
.
name
()
+
" : Failed to get forward workspace size"
);
workspace_shape
=
shape
{
shape
::
int8_type
,
{
workspace_size
}};
auto
x_shape
=
inputs
[
0
];
auto
w_shape
=
inputs
[
1
];
if
(
int8_x4_format
)
{
x_shape
=
pack_int8_shape
(
x_shape
);
w_shape
=
pack_int8_shape
(
w_shape
);
}
#ifdef MIGRAPHX_HAS_FIND_2_API
#ifdef MIGRAPHX_HAS_FIND_2_API
{
{
auto
conv_problem
=
make_obj
<
miopen_problem
>
(
auto
conv_problem
=
make_obj
<
miopen_problem
>
(
...
@@ -170,13 +192,34 @@ struct miopen_convolution
...
@@ -170,13 +192,34 @@ struct miopen_convolution
set_tensor_descriptor
(
miopenTensorConvolutionX
,
x_desc
,
conv_problem
);
set_tensor_descriptor
(
miopenTensorConvolutionX
,
x_desc
,
conv_problem
);
set_tensor_descriptor
(
miopenTensorConvolutionW
,
w_desc
,
conv_problem
);
set_tensor_descriptor
(
miopenTensorConvolutionW
,
w_desc
,
conv_problem
);
bool
preallocate
=
false
;
#ifdef MIGRAPHX_PREALLOCATE_MIOPEN_BUFFERS
// MIOpen has APIs to pass pre-allocated buffers starting from rocm-5.6
preallocate
=
true
;
#endif
auto
x
=
preallocate
?
to_gpu
(
generate_argument
(
x_shape
))
:
inputs
[
0
];
auto
w
=
preallocate
?
to_gpu
(
generate_argument
(
w_shape
))
:
inputs
[
1
];
auto
y
=
preallocate
?
allocate_gpu
(
output_shape
)
:
inputs
[
2
];
auto
workspace
=
preallocate
?
allocate_gpu
(
workspace_shape
)
:
migraphx
::
argument
(
workspace_shape
);
set_tensor_descriptor
(
miopenTensorConvolutionY
,
y_desc
,
conv_problem
);
set_tensor_descriptor
(
miopenTensorConvolutionY
,
y_desc
,
conv_problem
);
auto
*
miopen_stream_handle
=
ctx
.
get_stream
().
get_miopen
();
const
miopenTensorArgument_t
tensor_args
[
3
]
=
{
{
miopenTensorConvolutionX
,
nullptr
,
x
.
implicit
()},
{
miopenTensorConvolutionW
,
nullptr
,
w
.
implicit
()},
{
miopenTensorConvolutionY
,
nullptr
,
y
.
implicit
()},
};
solution_ptr
=
find_solution
(
miopen_stream_handle
,
3
,
tensor_args
,
workspace
.
implicit
(),
workspace_size
,
conv_problem
.
get
(),
ctx
.
get_exhaustive_tune_flag
());
solution_ptr
=
find_solution
(
status
=
miopenGetSolutionWorkspaceSize
(
solution_ptr
.
get
(),
&
workspace_size
);
miopen_stream_handle
,
conv_problem
.
get
(),
ctx
.
get_exhaustive_tune_flag
());
auto
status
=
miopenGetSolutionWorkspaceSize
(
solution_ptr
.
get
(),
&
workspace_size
);
if
(
status
!=
miopenStatusSuccess
)
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen"
+
op
.
name
()
+
" : failed to get solution's workspace size"
);
MIGRAPHX_THROW
(
"MIOpen"
+
op
.
name
()
+
" : failed to get solution's workspace size"
);
...
@@ -195,29 +238,10 @@ struct miopen_convolution
...
@@ -195,29 +238,10 @@ struct miopen_convolution
return
shape
{
shape
::
int8_type
,
{
workspace_size
}};
return
shape
{
shape
::
int8_type
,
{
workspace_size
}};
}
}
#else
#else
auto
status
=
miopenConvolutionForwardGetWorkSpaceSize
(
ctx
.
get_stream
().
get_miopen
(),
w_desc
.
get
(),
x_desc
.
get
(),
cd
.
get
(),
y_desc
.
get
(),
&
workspace_size
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen"
+
op
.
name
()
+
" : Failed to get forward workspace size"
);
workspace_shape
=
shape
{
shape
::
int8_type
,
{
workspace_size
}};
auto
x_shape
=
inputs
[
0
];
auto
w_shape
=
inputs
[
1
];
if
(
int8_x4_format
)
{
x_shape
=
pack_int8_shape
(
x_shape
);
w_shape
=
pack_int8_shape
(
w_shape
);
}
auto
x
=
to_gpu
(
generate_argument
(
x_shape
));
auto
x
=
to_gpu
(
generate_argument
(
x_shape
));
auto
w
=
to_gpu
(
generate_argument
(
w_shape
));
auto
w
=
to_gpu
(
generate_argument
(
w_shape
));
auto
y
=
allocate_gpu
(
output_shape
);
auto
y
=
allocate_gpu
(
output_shape
);
auto
workspace
=
allocate_gpu
(
workspace_shape
);
auto
workspace
=
allocate_gpu
(
workspace_shape
);
int
algo_count
=
1
;
int
algo_count
=
1
;
miopenConvAlgoPerf_t
perf
;
miopenConvAlgoPerf_t
perf
;
status
=
miopenFindConvolutionForwardAlgorithm
(
ctx
.
get_stream
().
get_miopen
(),
status
=
miopenFindConvolutionForwardAlgorithm
(
ctx
.
get_stream
().
get_miopen
(),
...
@@ -337,6 +361,7 @@ struct miopen_convolution
...
@@ -337,6 +361,7 @@ struct miopen_convolution
return
{
s
.
type
(),
lens
,
strides
};
return
{
s
.
type
(),
lens
,
strides
};
}
}
};
};
}
// namespace gpu
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
}
// namespace migraphx
...
...
src/targets/gpu/include/migraphx/gpu/miopen.hpp
View file @
97d4bb6c
...
@@ -75,21 +75,43 @@ using miopen_find_options = MIGRAPHX_MANAGE_PTR(miopenFindOptions_t, miopenDestr
...
@@ -75,21 +75,43 @@ using miopen_find_options = MIGRAPHX_MANAGE_PTR(miopenFindOptions_t, miopenDestr
using
miopen_problem
=
MIGRAPHX_MANAGE_PTR
(
miopenProblem_t
,
miopenDestroyProblem
);
using
miopen_problem
=
MIGRAPHX_MANAGE_PTR
(
miopenProblem_t
,
miopenDestroyProblem
);
using
miopen_solution
=
MIGRAPHX_MANAGE_PTR
(
miopenSolution_t
,
miopenDestroySolution
);
using
miopen_solution
=
MIGRAPHX_MANAGE_PTR
(
miopenSolution_t
,
miopenDestroySolution
);
inline
miopen_solution
inline
miopen_solution
find_solution
(
miopenHandle_t
handle
,
find_solution
(
miopenHandle_t
handle
,
miopenProblem_t
problem
,
bool
tune
=
false
)
size_t
num_inputs
,
const
miopenTensorArgument_t
*
tensor_args
,
void
*
workspace
,
size_t
workspace_size
,
miopenProblem_t
problem
,
bool
tune
=
false
)
{
{
miopenSolution_t
solution
;
miopenSolution_t
solution
;
size_t
found
=
0
;
size_t
found
=
0
;
miopen_find_options
fo
=
nullptr
;
miopen_find_options
fo
=
make_obj
<
miopen_find_options
>
(
&
miopenCreateFindOptions
)
;
if
(
tune
)
if
(
tune
)
{
{
fo
=
make_obj
<
miopen_find_options
>
(
&
miopenCreateFindOptions
);
miopenSetFindOptionTuning
(
fo
.
get
(),
1
);
miopenSetFindOptionTuning
(
fo
.
get
(),
1
);
}
}
auto
status
=
miopenFindSolutions
(
handle
,
problem
,
fo
.
get
(),
&
solution
,
&
found
,
1
);
#ifdef MIGRAPHX_PREALLOCATE_MIOPEN_BUFFERS
for
(
auto
i
:
range
(
num_inputs
))
{
auto
status
=
miopenSetFindOptionPreallocatedTensor
(
fo
.
get
(),
tensor_args
[
i
].
id
,
tensor_args
[
i
].
buffer
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen: failed to preallocate tensors for the find process"
);
}
auto
status
=
miopenSetFindOptionPreallocatedWorkspace
(
fo
.
get
(),
workspace
,
workspace_size
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen: failed to preallocate workspace for the find process"
);
#else
miopenStatus_t
status
;
(
void
)(
num_inputs
);
(
void
)(
tensor_args
);
(
void
)(
workspace_size
);
(
void
)(
workspace
);
#endif
status
=
miopenFindSolutions
(
handle
,
problem
,
fo
.
get
(),
&
solution
,
&
found
,
1
);
auto
result
=
miopen_solution
{
solution
};
auto
result
=
miopen_solution
{
solution
};
if
(
status
!=
miopenStatusSuccess
or
found
==
0
)
if
(
status
!=
miopenStatusSuccess
or
found
==
0
)
MIGRAPHX_THROW
(
"MIOpen miopenFindSolutions failed"
);
MIGRAPHX_THROW
(
"MIOpen
:
miopenFindSolutions failed"
);
return
result
;
return
result
;
}
}
...
@@ -170,7 +192,7 @@ inline convolution_descriptor make_conv(const T& op)
...
@@ -170,7 +192,7 @@ inline convolution_descriptor make_conv(const T& op)
}
}
template
<
class
T
>
template
<
class
T
>
inline
convolution_descriptor
make_
de
conv
(
const
T
&
op
)
inline
convolution_descriptor
make_conv
olution_backwards
(
const
T
&
op
)
{
{
auto
c
=
make_obj
<
convolution_descriptor
>
(
&
miopenCreateConvolutionDescriptor
);
auto
c
=
make_obj
<
convolution_descriptor
>
(
&
miopenCreateConvolutionDescriptor
);
miopenConvolutionMode_t
c_mode
=
miopenTranspose
;
miopenConvolutionMode_t
c_mode
=
miopenTranspose
;
...
...
src/targets/gpu/kernels/include/migraphx/kernels/debug.hpp
View file @
97d4bb6c
...
@@ -122,12 +122,14 @@ struct source_location_capture
...
@@ -122,12 +122,14 @@ struct source_location_capture
{
{
T
x
;
T
x
;
source_location
loc
;
source_location
loc
;
template
<
class
U
,
class
=
decltype
(
T
(
U
{}
))>
// declval is a workaround since default constructor for "U" is not working with rocm-5.6
template
<
class
U
>
static
U
&&
declval
();
template
<
class
U
,
class
=
decltype
(
T
(
declval
<
U
>()))
>
constexpr
source_location_capture
(
U
px
,
source_location
ploc
=
source_location
{})
constexpr
source_location_capture
(
U
px
,
source_location
ploc
=
source_location
{})
:
x
(
px
),
loc
(
ploc
)
:
x
(
px
),
loc
(
ploc
)
{
{
}
}
constexpr
operator
source_location
()
const
{
return
loc
;
}
constexpr
operator
source_location
()
const
{
return
loc
;
}
constexpr
operator
T
()
const
{
return
x
;
}
constexpr
operator
T
()
const
{
return
x
;
}
...
...
src/targets/gpu/lowering.cpp
View file @
97d4bb6c
...
@@ -106,7 +106,7 @@ struct miopen_apply
...
@@ -106,7 +106,7 @@ struct miopen_apply
add_extend_op
(
"topk"
);
add_extend_op
(
"topk"
);
add_convolution_op
(
"convolution"
);
add_convolution_op
(
"convolution"
);
add_convolution_op
(
"
de
convolution"
);
add_convolution_op
(
"convolution
_backwards
"
);
add_convolution_op
(
"quant_convolution"
);
add_convolution_op
(
"quant_convolution"
);
add_gemm_op
<
op
::
dot
>
(
"dot"
);
add_gemm_op
<
op
::
dot
>
(
"dot"
);
add_gemm_op
<
op
::
quant_dot
>
(
"quant_dot"
);
add_gemm_op
<
op
::
quant_dot
>
(
"quant_dot"
);
...
...
src/targets/gpu/mlir.cpp
View file @
97d4bb6c
...
@@ -389,14 +389,20 @@ struct mlir_program
...
@@ -389,14 +389,20 @@ struct mlir_program
mlir_operation_state
&
add_attributes
(
const
std
::
vector
<
named_attribute_t
>&
named_attrs
)
mlir_operation_state
&
add_attributes
(
const
std
::
vector
<
named_attribute_t
>&
named_attrs
)
{
{
auto
attributes
=
prog
->
name_attributes
(
named_attrs
);
auto
attributes
=
prog
->
name_attributes
(
named_attrs
);
mlirOperationStateAddAttributes
(
&
op_state
,
attributes
.
size
(),
attributes
.
data
());
if
(
not
attributes
.
empty
())
{
mlirOperationStateAddAttributes
(
&
op_state
,
attributes
.
size
(),
attributes
.
data
());
}
return
*
this
;
return
*
this
;
}
}
mlir_operation_state
&
add_attribute_value
(
const
value
&
v
)
mlir_operation_state
&
add_attribute_value
(
const
value
&
v
)
{
{
auto
attributes
=
prog
->
name_attributes
(
v
);
auto
attributes
=
prog
->
name_attributes
(
v
);
mlirOperationStateAddAttributes
(
&
op_state
,
attributes
.
size
(),
attributes
.
data
());
if
(
not
attributes
.
empty
())
{
mlirOperationStateAddAttributes
(
&
op_state
,
attributes
.
size
(),
attributes
.
data
());
}
return
*
this
;
return
*
this
;
}
}
...
@@ -419,13 +425,19 @@ struct mlir_program
...
@@ -419,13 +425,19 @@ struct mlir_program
return
shape
{
r
.
type
(),
r
.
lens
()};
return
shape
{
r
.
type
(),
r
.
lens
()};
});
});
auto
x
=
prog
->
make_tensors
(
reshaped
);
auto
x
=
prog
->
make_tensors
(
reshaped
);
mlirOperationStateAddResults
(
&
op_state
,
x
.
size
(),
x
.
data
());
if
(
not
x
.
empty
())
{
mlirOperationStateAddResults
(
&
op_state
,
x
.
size
(),
x
.
data
());
}
return
*
this
;
return
*
this
;
}
}
mlir_operation_state
&
add_operands
(
const
std
::
vector
<
MlirValue
>&
inputs
)
mlir_operation_state
&
add_operands
(
const
std
::
vector
<
MlirValue
>&
inputs
)
{
{
mlirOperationStateAddOperands
(
&
op_state
,
inputs
.
size
(),
inputs
.
data
());
if
(
not
inputs
.
empty
())
{
mlirOperationStateAddOperands
(
&
op_state
,
inputs
.
size
(),
inputs
.
data
());
}
return
*
this
;
return
*
this
;
}
}
...
@@ -435,7 +447,10 @@ struct mlir_program
...
@@ -435,7 +447,10 @@ struct mlir_program
std
::
transform
(
regions
.
begin
(),
regions
.
end
(),
mregions
.
begin
(),
[](
const
auto
&
r
)
{
std
::
transform
(
regions
.
begin
(),
regions
.
end
(),
mregions
.
begin
(),
[](
const
auto
&
r
)
{
return
r
.
get
();
return
r
.
get
();
});
});
mlirOperationStateAddOwnedRegions
(
&
op_state
,
mregions
.
size
(),
mregions
.
data
());
if
(
not
mregions
.
empty
())
{
mlirOperationStateAddOwnedRegions
(
&
op_state
,
mregions
.
size
(),
mregions
.
data
());
}
mlir_operation
op
(
mlirOperationCreate
(
&
op_state
));
mlir_operation
op
(
mlirOperationCreate
(
&
op_state
));
// Release memory since mlir_operation owns it
// Release memory since mlir_operation owns it
for
(
auto
&
r
:
regions
)
for
(
auto
&
r
:
regions
)
...
@@ -607,12 +622,12 @@ struct mlir_program
...
@@ -607,12 +622,12 @@ struct mlir_program
mlir_pass_manager
pm_back
{
mlirPassManagerCreate
(
ctx
.
get
())};
mlir_pass_manager
pm_back
{
mlirPassManagerCreate
(
ctx
.
get
())};
// 1st pipeline to call
// 1st pipeline to call
mlirMIGraphXAddHighLevelPipeline
(
pm_front
.
get
());
mlirMIGraphXAddHighLevelPipeline
(
pm_front
.
get
());
mlirPassManagerRun
(
pm_front
.
get
(),
mmodule
.
get
());
mlirPassManagerRun
OnOp
(
pm_front
.
get
(),
mlirModuleGetOperation
(
mmodule
.
get
())
)
;
// 2nd pipeline to call
// 2nd pipeline to call
get_module_tuned
();
get_module_tuned
();
mlirMIGraphXAddBackendPipeline
(
pm_back
.
get
(),
target_arch
.
c_str
());
mlirMIGraphXAddBackendPipeline
(
pm_back
.
get
(),
target_arch
.
c_str
());
mlirPassManagerRun
(
pm_back
.
get
(),
mmodule
.
get
());
mlirPassManagerRun
OnOp
(
pm_back
.
get
(),
mlirModuleGetOperation
(
mmodule
.
get
())
)
;
code_object_op
op
{};
code_object_op
op
{};
op
.
symbol_name
=
sym_name
;
op
.
symbol_name
=
sym_name
;
...
@@ -701,6 +716,11 @@ struct mlir_program
...
@@ -701,6 +716,11 @@ struct mlir_program
bool
get_module_tuned
()
const
bool
get_module_tuned
()
const
{
{
static
mlir_tuning_table
tuning_table
=
create_tuning_table
();
static
mlir_tuning_table
tuning_table
=
create_tuning_table
();
// The tuning table as currently implemented is currently not
// thread safe. This will be fixed in the future. For now,
// stick a mutex around all tuning table interaction.
static
std
::
mutex
lock
;
std
::
lock_guard
<
std
::
mutex
>
guard
(
lock
);
if
(
!
mlirRockTuningSetFromTable
(
tuning_table
.
get
(),
mmodule
.
get
()))
if
(
!
mlirRockTuningSetFromTable
(
tuning_table
.
get
(),
mmodule
.
get
()))
{
{
const
char
*
prob_config
=
mlirRockTuningGetKey
(
tuning_table
.
get
(),
mmodule
.
get
());
const
char
*
prob_config
=
mlirRockTuningGetKey
(
tuning_table
.
get
(),
mmodule
.
get
());
...
@@ -778,9 +798,6 @@ code_object_op compile_mlir(const context&, module m, const std::vector<instruct
...
@@ -778,9 +798,6 @@ code_object_op compile_mlir(const context&, module m, const std::vector<instruct
{
{
adjust_param_shapes
(
m
,
inputs
);
adjust_param_shapes
(
m
,
inputs
);
const
bool
trace
=
enabled
(
MIGRAPHX_TRACE_MLIR
{});
const
bool
trace
=
enabled
(
MIGRAPHX_TRACE_MLIR
{});
// set mutex while llvm thread support is disabled.
static
std
::
mutex
g_mlirc_mutex
;
// NOLINT
const
std
::
lock_guard
<
std
::
mutex
>
lock
(
g_mlirc_mutex
);
if
(
trace
)
if
(
trace
)
std
::
cout
<<
m
<<
std
::
endl
;
std
::
cout
<<
m
<<
std
::
endl
;
...
...
src/targets/gpu/rocblas.cpp
View file @
97d4bb6c
...
@@ -55,9 +55,16 @@ bool get_compute_fp32_flag()
...
@@ -55,9 +55,16 @@ bool get_compute_fp32_flag()
bool
get_int8_x4_format
(
context
&
ctx
)
bool
get_int8_x4_format
(
context
&
ctx
)
{
{
#if ROCBLAS_VERSION_MAJOR >= 3
(
void
)(
ctx
);
return
false
;
#else
// int8x4 packed format is only available starting from rocblas-v2.38 and it is deprecated in
// v3.0 and will be removed in v4.0
rocblas_gemm_flags
flag
;
rocblas_gemm_flags
flag
;
rocblas_query_int8_layout_flag
(
ctx
.
get_stream
().
get_rocblas
(),
&
flag
);
rocblas_query_int8_layout_flag
(
ctx
.
get_stream
().
get_rocblas
(),
&
flag
);
return
flag
==
rocblas_gemm_flags_pack_int8x4
;
return
flag
==
rocblas_gemm_flags_pack_int8x4
;
#endif
}
}
}
// namespace gpu
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace MIGRAPHX_INLINE_NS
...
...
src/targets/ref/lowering.cpp
View file @
97d4bb6c
...
@@ -27,7 +27,7 @@
...
@@ -27,7 +27,7 @@
#include <migraphx/dfor.hpp>
#include <migraphx/dfor.hpp>
#include <migraphx/op/identity.hpp>
#include <migraphx/op/identity.hpp>
#include <migraphx/op/convolution.hpp>
#include <migraphx/op/convolution.hpp>
#include <migraphx/op/
de
convolution.hpp>
#include <migraphx/op/convolution
_backwards
.hpp>
#include <migraphx/op/quant_convolution.hpp>
#include <migraphx/op/quant_convolution.hpp>
#include <migraphx/op/dot.hpp>
#include <migraphx/op/dot.hpp>
#include <migraphx/op/quant_dot.hpp>
#include <migraphx/op/quant_dot.hpp>
...
...
src/tf/parse_batchnorm.cpp
View file @
97d4bb6c
...
@@ -52,7 +52,6 @@ struct parse_batchnorm : op_parser<parse_batchnorm>
...
@@ -52,7 +52,6 @@ struct parse_batchnorm : op_parser<parse_batchnorm>
auto
x_type
=
args
[
0
]
->
get_shape
().
type
();
auto
x_type
=
args
[
0
]
->
get_shape
().
type
();
// unsqueeze tensors of shape (C) to broadcast correctly
// unsqueeze tensors of shape (C) to broadcast correctly
auto
rt
=
info
.
add_literal
(
migraphx
::
literal
{
migraphx
::
shape
{
x_type
},
{
0.5
}});
auto
eps
=
info
.
add_literal
(
migraphx
::
literal
{
migraphx
::
shape
{
x_type
},
{
epsilon
}});
auto
eps
=
info
.
add_literal
(
migraphx
::
literal
{
migraphx
::
shape
{
x_type
},
{
epsilon
}});
auto
scale_unsqueeze
=
auto
scale_unsqueeze
=
...
@@ -64,11 +63,11 @@ struct parse_batchnorm : op_parser<parse_batchnorm>
...
@@ -64,11 +63,11 @@ struct parse_batchnorm : op_parser<parse_batchnorm>
auto
var_unsqueeze
=
auto
var_unsqueeze
=
info
.
add_instruction
(
migraphx
::
make_op
(
"unsqueeze"
,
{{
"axes"
,
{
1
,
2
}}}),
args
[
4
]);
info
.
add_instruction
(
migraphx
::
make_op
(
"unsqueeze"
,
{{
"axes"
,
{
1
,
2
}}}),
args
[
4
]);
auto
numer
=
info
.
add_broadcastable_binary_op
(
"sub"
,
args
[
0
],
mean_unsqueeze
);
auto
x_sub_mean
=
info
.
add_broadcastable_binary_op
(
"sub"
,
args
[
0
],
mean_unsqueeze
);
auto
var_eps
=
info
.
add_broadcastable_binary_op
(
"add"
,
var_unsqueeze
,
eps
);
auto
var_eps
=
info
.
add_broadcastable_binary_op
(
"add"
,
var_unsqueeze
,
eps
);
auto
denom
=
info
.
add_
broadcastable_binary_op
(
"pow"
,
var_eps
,
rt
);
auto
rsqrt
=
info
.
add_
instruction
(
make_op
(
"rsqrt"
)
,
var_eps
);
auto
div0
=
info
.
add_broadcastable_binary_op
(
"
div"
,
numer
,
denom
);
auto
mul0
=
info
.
add_broadcastable_binary_op
(
"
mul"
,
scale_unsqueeze
,
rsqrt
);
auto
r0
=
info
.
add_broadcastable_binary_op
(
"mul"
,
div0
,
scale_unsqueeze
);
auto
r0
=
info
.
add_broadcastable_binary_op
(
"mul"
,
x_sub_mean
,
mul0
);
return
info
.
add_broadcastable_binary_op
(
"add"
,
r0
,
bias_unsqueeze
);
return
info
.
add_broadcastable_binary_op
(
"add"
,
r0
,
bias_unsqueeze
);
}
}
};
};
...
...
src/verify_args.cpp
View file @
97d4bb6c
...
@@ -35,7 +35,7 @@ bool verify_args(const std::string& name,
...
@@ -35,7 +35,7 @@ bool verify_args(const std::string& name,
bool
passed
=
true
;
bool
passed
=
true
;
visit_all
(
ref_arg
,
target_arg
)([
&
](
auto
ref
,
auto
target
)
{
visit_all
(
ref_arg
,
target_arg
)([
&
](
auto
ref
,
auto
target
)
{
double
error
;
double
error
;
passed
=
verify_range
(
ref
,
target
,
tolerance
,
&
error
);
passed
=
verify
::
verify_range
(
ref
,
target
,
tolerance
,
&
error
);
if
(
not
passed
)
if
(
not
passed
)
{
{
// TODO: Check for nans
// TODO: Check for nans
...
@@ -45,27 +45,27 @@ bool verify_args(const std::string& name,
...
@@ -45,27 +45,27 @@ bool verify_args(const std::string& name,
std
::
cout
<<
"ref:"
<<
ref
<<
std
::
endl
;
std
::
cout
<<
"ref:"
<<
ref
<<
std
::
endl
;
if
(
target
.
size
()
<
32
)
if
(
target
.
size
()
<
32
)
std
::
cout
<<
"target:"
<<
target
<<
std
::
endl
;
std
::
cout
<<
"target:"
<<
target
<<
std
::
endl
;
if
(
range_zero
(
ref
))
if
(
verify
::
range_zero
(
ref
))
std
::
cout
<<
"Ref data is all zeros"
<<
std
::
endl
;
std
::
cout
<<
"Ref data is all zeros"
<<
std
::
endl
;
if
(
range_zero
(
target
))
if
(
verify
::
range_zero
(
target
))
std
::
cout
<<
"Target data is all zeros"
<<
std
::
endl
;
std
::
cout
<<
"Target data is all zeros"
<<
std
::
endl
;
auto
mxdiff
=
max_diff
(
ref
,
target
);
auto
mxdiff
=
verify
::
max_diff
(
ref
,
target
);
std
::
cout
<<
"Max diff: "
<<
mxdiff
<<
std
::
endl
;
std
::
cout
<<
"Max diff: "
<<
mxdiff
<<
std
::
endl
;
auto
idx
=
mismatch_idx
(
ref
,
target
,
float_equal
);
auto
idx
=
verify
::
mismatch_idx
(
ref
,
target
,
float_equal
);
if
(
idx
<
range_distance
(
ref
))
if
(
idx
<
verify
::
range_distance
(
ref
))
{
{
std
::
cout
<<
"Mismatch at "
<<
idx
<<
": "
<<
ref
[
idx
]
<<
" != "
<<
target
[
idx
]
std
::
cout
<<
"Mismatch at "
<<
idx
<<
": "
<<
ref
[
idx
]
<<
" != "
<<
target
[
idx
]
<<
std
::
endl
;
<<
std
::
endl
;
}
}
auto
ref_nan_idx
=
find_idx
(
ref
,
not_finite
);
auto
ref_nan_idx
=
find_idx
(
ref
,
verify
::
not_finite
);
if
(
ref_nan_idx
>=
0
)
if
(
ref_nan_idx
>=
0
)
std
::
cout
<<
"Non finite number found in ref at "
<<
ref_nan_idx
<<
": "
std
::
cout
<<
"Non finite number found in ref at "
<<
ref_nan_idx
<<
": "
<<
ref
[
ref_nan_idx
]
<<
std
::
endl
;
<<
ref
[
ref_nan_idx
]
<<
std
::
endl
;
auto
target_nan_idx
=
find_idx
(
target
,
not_finite
);
auto
target_nan_idx
=
find_idx
(
target
,
verify
::
not_finite
);
if
(
target_nan_idx
>=
0
)
if
(
target_nan_idx
>=
0
)
std
::
cout
<<
"Non finite number found in target at "
<<
target_nan_idx
<<
": "
std
::
cout
<<
"Non finite number found in target at "
<<
target_nan_idx
<<
": "
<<
target
[
target_nan_idx
]
<<
std
::
endl
;
<<
target
[
target_nan_idx
]
<<
std
::
endl
;
...
@@ -73,27 +73,27 @@ bool verify_args(const std::string& name,
...
@@ -73,27 +73,27 @@ bool verify_args(const std::string& name,
}
}
else
else
{
{
if
(
range_zero
(
ref
))
if
(
verify
::
range_zero
(
ref
))
std
::
cout
<<
"Ref data is all zeros"
<<
std
::
endl
;
std
::
cout
<<
"Ref data is all zeros"
<<
std
::
endl
;
if
(
range_zero
(
target
))
if
(
verify
::
range_zero
(
target
))
std
::
cout
<<
"Target data is all zeros"
<<
std
::
endl
;
std
::
cout
<<
"Target data is all zeros"
<<
std
::
endl
;
// auto mxdiff = max_diff(ref, target);
// auto mxdiff = max_diff(ref, target);
// std::cout << "Max diff: " << mxdiff << std::endl;
// std::cout << "Max diff: " << mxdiff << std::endl;
// auto idx = mismatch_idx(ref, target, float_equal);
// auto idx = mismatch_idx(ref, target, float_equal);
// if(idx < range_distance(ref))
// if(idx <
verify::
range_distance(ref))
// {
// {
// std::cout << "Mismatch at " << idx << ": " << ref[idx] << " != " << target[idx]
// std::cout << "Mismatch at " << idx << ": " << ref[idx] << " != " << target[idx]
// << std::endl;
// << std::endl;
// }
// }
auto
ref_nan_idx
=
find_idx
(
ref
,
not_finite
);
auto
ref_nan_idx
=
find_idx
(
ref
,
verify
::
not_finite
);
if
(
ref_nan_idx
>=
0
)
if
(
ref_nan_idx
>=
0
)
std
::
cout
<<
"Non finite number found in ref at "
<<
ref_nan_idx
<<
": "
std
::
cout
<<
"Non finite number found in ref at "
<<
ref_nan_idx
<<
": "
<<
ref
[
ref_nan_idx
]
<<
std
::
endl
;
<<
ref
[
ref_nan_idx
]
<<
std
::
endl
;
auto
target_nan_idx
=
find_idx
(
target
,
not_finite
);
auto
target_nan_idx
=
find_idx
(
target
,
verify
::
not_finite
);
if
(
target_nan_idx
>=
0
)
if
(
target_nan_idx
>=
0
)
std
::
cout
<<
"Non finite number found in target at "
<<
target_nan_idx
<<
": "
std
::
cout
<<
"Non finite number found in target at "
<<
target_nan_idx
<<
": "
<<
target
[
target_nan_idx
]
<<
std
::
endl
;
<<
target
[
target_nan_idx
]
<<
std
::
endl
;
...
...
test/api/test_gpu.cpp
View file @
97d4bb6c
...
@@ -34,7 +34,6 @@ TEST_CASE(load_and_run)
...
@@ -34,7 +34,6 @@ TEST_CASE(load_and_run)
auto
shapes_before
=
p
.
get_output_shapes
();
auto
shapes_before
=
p
.
get_output_shapes
();
migraphx
::
compile_options
options
;
migraphx
::
compile_options
options
;
options
.
set_offload_copy
();
options
.
set_offload_copy
();
options
.
set_exhaustive_tune_flag
();
p
.
compile
(
migraphx
::
target
(
"gpu"
),
options
);
p
.
compile
(
migraphx
::
target
(
"gpu"
),
options
);
auto
shapes_after
=
p
.
get_output_shapes
();
auto
shapes_after
=
p
.
get_output_shapes
();
CHECK
(
shapes_before
.
size
()
==
1
);
CHECK
(
shapes_before
.
size
()
==
1
);
...
...
test/gpu/codegen_literal.cpp
View file @
97d4bb6c
...
@@ -80,7 +80,7 @@ TEST_CASE(mul_literal_round_test)
...
@@ -80,7 +80,7 @@ TEST_CASE(mul_literal_round_test)
migraphx
::
target
gpu_t
=
migraphx
::
make_target
(
"gpu"
);
migraphx
::
target
gpu_t
=
migraphx
::
make_target
(
"gpu"
);
run_prog
(
p
,
gpu_t
,
m
,
gpu_result
);
run_prog
(
p
,
gpu_t
,
m
,
gpu_result
);
EXPECT
(
migraphx
::
verify_range
(
ref_result
,
gpu_result
));
EXPECT
(
migraphx
::
verify
::
verify
_range
(
ref_result
,
gpu_result
));
}
}
int
main
(
int
argc
,
const
char
*
argv
[])
{
test
::
run
(
argc
,
argv
);
}
int
main
(
int
argc
,
const
char
*
argv
[])
{
test
::
run
(
argc
,
argv
);
}
test/gpu/manage_host_buffer.cpp
View file @
97d4bb6c
...
@@ -64,7 +64,7 @@ TEST_CASE(host_same_buffer_copy)
...
@@ -64,7 +64,7 @@ TEST_CASE(host_same_buffer_copy)
auto
result
=
p
.
eval
(
pp
).
back
();
auto
result
=
p
.
eval
(
pp
).
back
();
std
::
vector
<
float
>
results_vector
(
ss
.
elements
(),
-
1
);
std
::
vector
<
float
>
results_vector
(
ss
.
elements
(),
-
1
);
result
.
visit
([
&
](
auto
output
)
{
results_vector
.
assign
(
output
.
begin
(),
output
.
end
());
});
result
.
visit
([
&
](
auto
output
)
{
results_vector
.
assign
(
output
.
begin
(),
output
.
end
());
});
EXPECT
(
migraphx
::
verify_range
(
c_vec
,
results_vector
));
EXPECT
(
migraphx
::
verify
::
verify
_range
(
c_vec
,
results_vector
));
}
}
TEST_CASE
(
arguments_lifetime
)
TEST_CASE
(
arguments_lifetime
)
...
...
test/gpu/quantization.cpp
View file @
97d4bb6c
...
@@ -52,7 +52,7 @@ TEST_CASE(gpu_target_copy)
...
@@ -52,7 +52,7 @@ TEST_CASE(gpu_target_copy)
std
::
vector
<
int8_t
>
val_final
;
std
::
vector
<
int8_t
>
val_final
;
ref_arg_final
.
visit
([
&
](
auto
v
)
{
val_final
.
assign
(
v
.
begin
(),
v
.
end
());
});
ref_arg_final
.
visit
([
&
](
auto
v
)
{
val_final
.
assign
(
v
.
begin
(),
v
.
end
());
});
EXPECT
(
migraphx
::
verify_range
(
val_orig
,
val_final
));
EXPECT
(
migraphx
::
verify
::
verify
_range
(
val_orig
,
val_final
));
}
}
TEST_CASE
(
int8_quantization
)
TEST_CASE
(
int8_quantization
)
...
@@ -118,9 +118,9 @@ TEST_CASE(int8_quantization)
...
@@ -118,9 +118,9 @@ TEST_CASE(int8_quantization)
// the regular pipeline uses the rewrite_quantization in the much
// the regular pipeline uses the rewrite_quantization in the much
// earlier stage.
// earlier stage.
if
(
migraphx
::
gpu
::
mlir_enabled
())
if
(
migraphx
::
gpu
::
mlir_enabled
())
EXPECT
(
migraphx
::
verify_range
(
ref_result
,
gpu_result
,
1e5
));
EXPECT
(
migraphx
::
verify
::
verify
_range
(
ref_result
,
gpu_result
,
1e5
));
else
else
EXPECT
(
migraphx
::
verify_range
(
ref_result
,
gpu_result
));
EXPECT
(
migraphx
::
verify
::
verify
_range
(
ref_result
,
gpu_result
));
}
}
}
}
...
...
test/multi_target/multitarget_test.cpp
View file @
97d4bb6c
...
@@ -34,13 +34,13 @@
...
@@ -34,13 +34,13 @@
#include <migraphx/literal.hpp>
#include <migraphx/literal.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/shape.hpp>
#include <migraphx/shape.hpp>
#include <migraphx/verify.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/check_shapes.hpp>
#include <migraphx/check_shapes.hpp>
#include <migraphx/functional.hpp>
#include <migraphx/functional.hpp>
#include <basic_ops.hpp>
#include <basic_ops.hpp>
#include <migraphx/compile_options.hpp>
#include <migraphx/compile_options.hpp>
#include <migraphx/register_target.hpp>
#include <migraphx/register_target.hpp>
#include <migraphx/generate.hpp>
#include "test.hpp"
#include "test.hpp"
// check if it is custom_op or run_on_module operator
// check if it is custom_op or run_on_module operator
...
@@ -180,38 +180,74 @@ TEST_CASE(multitarget_compile_cpu_gpu)
...
@@ -180,38 +180,74 @@ TEST_CASE(multitarget_compile_cpu_gpu)
auto
z_param
=
mm
->
add_parameter
(
"z"
,
s
);
auto
z_param
=
mm
->
add_parameter
(
"z"
,
s
);
auto
cpu_ins
=
mm
->
add_instruction
(
auto
cpu_ins
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"run_on_target"
,
{{
"target_id"
,
1
}}),
{
x_param
,
y_param
},
{
cpu_mod
});
migraphx
::
make_op
(
"run_on_target"
,
{{
"target_id"
,
1
}}),
{
x_param
,
y_param
},
{
cpu_mod
});
auto
cpu_ins_0
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"get_tuple_elem"
,
{{
"index"
,
0
}}),
cpu_ins
);
auto
gpu_ins
=
mm
->
add_instruction
(
auto
gpu_ins
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"run_on_target"
,
{{
"target_id"
,
0
}}),
{
cpu_ins
,
z_param
},
{
gpu_mod
});
migraphx
::
make_op
(
"run_on_target"
,
{{
"target_id"
,
0
}}),
{
cpu_ins_0
,
z_param
},
{
gpu_mod
});
mm
->
add_return
({
gpu_ins
});
auto
gpu_ins_0
=
p
.
compile
({
migraphx
::
make_target
(
"gpu"
),
migraphx
::
make_target
(
"cpu"
)});
mm
->
add_instruction
(
migraphx
::
make_op
(
"get_tuple_elem"
,
{{
"index"
,
0
}}),
gpu_ins
);
mm
->
add_return
({
gpu_ins_0
});
migraphx
::
compile_options
gpu_opts
;
gpu_opts
.
offload_copy
=
true
;
p
.
compile
({
migraphx
::
make_target
(
"gpu"
),
migraphx
::
make_target
(
"cpu"
)},
{
gpu_opts
});
EXPECT
(
check_compiled_program
(
p
,
{
migraphx
::
make_target
(
"gpu"
),
migraphx
::
make_target
(
"cpu"
)}));
EXPECT
(
check_compiled_program
(
p
,
{
migraphx
::
make_target
(
"gpu"
),
migraphx
::
make_target
(
"cpu"
)}));
migraphx
::
parameter_map
params
;
params
[
"x"
]
=
migraphx
::
fill_argument
(
s
,
1
);
params
[
"y"
]
=
migraphx
::
fill_argument
(
s
,
2
);
params
[
"z"
]
=
migraphx
::
fill_argument
(
s
,
3
);
auto
result
=
p
.
eval
(
params
).
back
();
auto
gold
=
migraphx
::
fill_argument
(
s
,
6
);
EXPECT
(
gold
==
result
);
}
}
TEST_CASE
(
single_target_compile
)
TEST_CASE
(
single_target_
multi_
compile
)
{
{
migraphx
::
program
p
;
migraphx
::
program
p
;
auto
*
mm
=
p
.
get_main_module
();
migraphx
::
shape
boxes_s
{
migraphx
::
shape
::
float_type
,
{
1
,
6
,
4
}};
migraphx
::
shape
boxes_s
{
migraphx
::
shape
::
float_type
,
{
1
,
6
,
4
}};
auto
*
mm
=
p
.
get_main_module
();
auto
boxes_param
=
mm
->
add_parameter
(
"boxes"
,
boxes_s
);
auto
*
gpu_mod
=
p
.
create_module
(
"gpu_mod"
);
auto
boxes_param_gpu
=
gpu_mod
->
add_parameter
(
"boxes_param_gpu"
,
boxes_s
);
migraphx
::
shape
scores_s
{
migraphx
::
shape
::
float_type
,
{
1
,
1
,
6
}};
migraphx
::
shape
scores_s
{
migraphx
::
shape
::
float_type
,
{
1
,
1
,
6
}};
std
::
vector
<
float
>
scores_vec
=
{
0.9
,
0.75
,
0.6
,
0.95
,
0.5
,
0.3
};
std
::
vector
<
float
>
scores_vec
=
{
0.9
,
0.75
,
0.6
,
0.95
,
0.5
,
0.3
};
auto
scores_l
=
gpu_mod
->
add_literal
(
migraphx
::
literal
(
scores_s
,
scores_vec
));
auto
boxes_l
=
mm
->
add_parameter
(
"boxes"
,
boxes_s
);
auto
max_out_l
=
gpu_mod
->
add_literal
(
int64_t
{
4
});
auto
scores_l
=
mm
->
add_literal
(
migraphx
::
literal
(
scores_s
,
scores_vec
));
auto
iou_threshold
=
gpu_mod
->
add_literal
(
0.5
f
);
auto
max_out_l
=
mm
->
add_literal
(
int64_t
{
4
});
auto
score_threshold
=
gpu_mod
->
add_literal
(
0.0
f
);
auto
iou_threshold
=
mm
->
add_literal
(
0.5
f
);
auto
r
=
gpu_mod
->
add_instruction
(
auto
score_threshold
=
mm
->
add_literal
(
0.0
f
);
migraphx
::
make_op
(
"nonmaxsuppression"
,
{{
"center_point_box"
,
true
},
{
"use_dyn_output"
,
true
}}),
auto
r
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"nonmaxsuppression"
,
{{
"center_point_box"
,
1
}}),
boxes_param_gpu
,
boxes_l
,
scores_l
,
scores_l
,
max_out_l
,
max_out_l
,
iou_threshold
,
iou_threshold
,
score_threshold
);
score_threshold
);
gpu_mod
->
add_return
({
r
});
mm
->
add_return
({
r
});
p
.
compile
(
migraphx
::
make_target
(
"gpu"
));
auto
run_on_gpu
=
mm
->
add_instruction
(
EXPECT
(
is_compiled_gpu_module
(
*
p
.
get_main_module
()));
migraphx
::
make_op
(
"run_on_target"
,
{{
"target_id"
,
0
}}),
{
boxes_param
},
{
gpu_mod
});
auto
run_on_gpu_0
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"get_tuple_elem"
,
{{
"index"
,
0
}}),
run_on_gpu
);
mm
->
add_return
({
run_on_gpu_0
});
// compile using multi-target compilation path
migraphx
::
compile_options
gpu_opts
;
gpu_opts
.
offload_copy
=
true
;
// need to add "ref" to avoid ambigious call to "compile()"
p
.
compile
({
migraphx
::
make_target
(
"gpu"
),
migraphx
::
make_target
(
"ref"
)},
{
gpu_opts
});
EXPECT
(
check_compiled_program
(
p
,
{
migraphx
::
make_target
(
"gpu"
),
migraphx
::
make_target
(
"ref"
)}));
// eval
migraphx
::
parameter_map
params
;
std
::
vector
<
float
>
boxes_vec
=
{
0.5
,
0.5
,
1.0
,
1.0
,
0.5
,
0.6
,
1.0
,
1.0
,
0.5
,
0.4
,
1.0
,
1.0
,
0.5
,
10.5
,
1.0
,
1.0
,
0.5
,
10.6
,
1.0
,
1.0
,
0.5
,
100.5
,
1.0
,
1.0
};
params
[
"boxes"
]
=
migraphx
::
argument
(
boxes_s
,
boxes_vec
.
data
());
auto
output
=
p
.
eval
(
params
).
back
();
std
::
vector
<
int64_t
>
gold_vec
=
{
0
,
0
,
3
,
0
,
0
,
0
,
0
,
0
,
5
};
auto
gold
=
migraphx
::
argument
(
migraphx
::
shape
{
migraphx
::
shape
::
int64_type
,
{
3
,
3
}},
gold_vec
.
data
());
EXPECT
(
output
==
gold
);
}
}
TEST_CASE
(
multitarget_compile_if_then_else
)
TEST_CASE
(
multitarget_compile_if_then_else
)
...
@@ -224,54 +260,65 @@ TEST_CASE(multitarget_compile_if_then_else)
...
@@ -224,54 +260,65 @@ TEST_CASE(multitarget_compile_if_then_else)
auto
x
=
mm
->
add_parameter
(
"x"
,
ds
);
auto
x
=
mm
->
add_parameter
(
"x"
,
ds
);
auto
y
=
mm
->
add_parameter
(
"y"
,
ds
);
auto
y
=
mm
->
add_parameter
(
"y"
,
ds
);
auto
*
then_mod
=
p
.
create_module
(
"if_gpu_mod"
);
auto
*
then_mod
=
p
.
create_module
(
"if_gpu_mod"
);
std
::
vector
<
float
>
data1
=
{
0.384804
,
-
1.77948
,
-
0.453775
,
0.477438
,
-
1.06333
,
-
1.12893
};
std
::
vector
<
float
>
data1
(
ds
.
elements
(),
1
);
auto
l1
=
then_mod
->
add_literal
(
migraphx
::
literal
(
ds
,
data1
));
auto
l1
=
then_mod
->
add_literal
(
migraphx
::
literal
(
ds
,
data1
));
auto
a1
=
then_mod
->
add_instruction
(
migraphx
::
make_op
(
"add"
),
x
,
l1
);
auto
gpu_x
=
then_mod
->
add_parameter
(
"gpu_x"
,
ds
);
auto
a1
=
then_mod
->
add_instruction
(
migraphx
::
make_op
(
"add"
),
gpu_x
,
l1
);
then_mod
->
add_return
({
a1
});
then_mod
->
add_return
({
a1
});
auto
*
else_mod
=
p
.
create_module
(
"else_cpu_mod"
);
auto
*
else_mod
=
p
.
create_module
(
"else_cpu_mod"
);
std
::
vector
<
float
>
data2
=
{
-
0.258047
,
0.360394
,
0.536804
,
-
0.577762
,
1.0217
,
1.02442
};
std
::
vector
<
float
>
data2
(
ds
.
elements
(),
2
);
auto
l2
=
else_mod
->
add_literal
(
migraphx
::
literal
(
ds
,
data2
));
auto
l2
=
else_mod
->
add_literal
(
migraphx
::
literal
(
ds
,
data2
));
auto
a2
=
else_mod
->
add_instruction
(
migraphx
::
make_op
(
"mul"
),
y
,
l2
);
auto
cpu_y
=
else_mod
->
add_parameter
(
"cpu_y"
,
ds
);
auto
a2
=
else_mod
->
add_instruction
(
migraphx
::
make_op
(
"mul"
),
cpu_y
,
l2
);
else_mod
->
add_return
({
a2
});
else_mod
->
add_return
({
a2
});
auto
*
run_on_cpu_mod
=
p
.
create_module
(
"run_on_cpu"
);
auto
*
run_on_cpu_mod
=
p
.
create_module
(
"run_on_cpu"
);
auto
run_cpu_ins
=
run_on_cpu_mod
->
add_instruction
(
auto
run_cpu_ins
=
run_on_cpu_mod
->
add_instruction
(
migraphx
::
make_op
(
"run_on_target"
,
{{
"target_id"
,
1
}}),
{},
{
else_mod
});
migraphx
::
make_op
(
"run_on_target"
,
{{
"target_id"
,
1
}}),
{
y
},
{
else_mod
});
run_on_cpu_mod
->
add_return
({
run_cpu_ins
});
auto
run_cpu_ins_0
=
run_on_cpu_mod
->
add_instruction
(
migraphx
::
make_op
(
"get_tuple_elem"
,
{{
"index"
,
0
}}),
run_cpu_ins
);
run_on_cpu_mod
->
add_return
({
run_cpu_ins_0
});
auto
*
run_on_gpu_mod
=
p
.
create_module
(
"run_on_gpu"
);
auto
*
run_on_gpu_mod
=
p
.
create_module
(
"run_on_gpu"
);
auto
run_gpu_ins
=
run_on_gpu_mod
->
add_instruction
(
auto
run_gpu_ins
=
run_on_gpu_mod
->
add_instruction
(
migraphx
::
make_op
(
"run_on_target"
,
{{
"target_id"
,
0
}}),
{},
{
then_mod
});
migraphx
::
make_op
(
"run_on_target"
,
{{
"target_id"
,
0
}}),
{
x
},
{
then_mod
});
run_on_gpu_mod
->
add_return
({
run_gpu_ins
});
auto
run_gpu_ins_0
=
run_on_gpu_mod
->
add_instruction
(
migraphx
::
make_op
(
"get_tuple_elem"
,
{{
"index"
,
0
}}),
run_gpu_ins
);
run_on_gpu_mod
->
add_return
({
run_gpu_ins_0
});
auto
ret
=
auto
ret
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"if"
),
{
cond
},
{
run_on_gpu_mod
,
run_on_cpu_mod
});
mm
->
add_instruction
(
migraphx
::
make_op
(
"if"
),
{
cond
},
{
run_on_gpu_mod
,
run_on_cpu_mod
});
auto
r
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"get_tuple_elem"
,
{{
"index"
,
0
}}),
ret
);
auto
r
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"get_tuple_elem"
,
{{
"index"
,
0
}}),
ret
);
mm
->
add_return
({
r
});
mm
->
add_return
({
r
});
// compile
// compile
p
.
compile
({
migraphx
::
make_target
(
"gpu"
),
migraphx
::
make_target
(
"cpu"
)});
migraphx
::
compile_options
gpu_opts
;
gpu_opts
.
offload_copy
=
true
;
p
.
compile
({
migraphx
::
make_target
(
"gpu"
),
migraphx
::
make_target
(
"cpu"
)},
{
gpu_opts
});
EXPECT
(
check_compiled_program
(
p
,
{
migraphx
::
make_target
(
"gpu"
),
migraphx
::
make_target
(
"cpu"
)}));
EXPECT
(
check_compiled_program
(
p
,
{
migraphx
::
make_target
(
"gpu"
),
migraphx
::
make_target
(
"cpu"
)}));
migraphx
::
parameter_map
params
;
params
[
"x"
]
=
migraphx
::
fill_argument
(
ds
,
2
);
params
[
"y"
]
=
migraphx
::
fill_argument
(
ds
,
3
);
for
(
bool
cond_val
:
{
true
,
false
})
{
params
[
"cond"
]
=
migraphx
::
argument
(
cond_s
,
&
cond_val
);
auto
result
=
p
.
eval
(
params
).
back
();
auto
gold
=
migraphx
::
fill_argument
(
ds
,
(
cond_val
?
3
:
6
));
EXPECT
(
gold
==
result
);
}
}
}
// TODO : FPGA compilation is broken right now, below test mentions fpga but doesn't compile for it
TEST_CASE
(
multitarget_compile_nested_if_then_else
)
TEST_CASE
(
multitarget_compile_nested_if_then_else
)
{
{
float
seed
=
0.0
f
;
std
::
mt19937
gen
(
seed
);
std
::
uniform_real_distribution
<>
dis
(
0.0
,
1.0
);
auto
get_random_values
=
[
&
](
size_t
elements
)
{
std
::
vector
<
float
>
rand_samples
(
elements
);
std
::
generate
(
rand_samples
.
begin
(),
rand_samples
.
end
(),
[
&
]()
{
return
dis
(
gen
);
});
return
rand_samples
;
};
std
::
unordered_map
<
std
::
size_t
,
std
::
size_t
>
counter_map
=
{{
0
,
0
},
{
1
,
0
}};
std
::
unordered_map
<
std
::
size_t
,
std
::
size_t
>
counter_map
=
{{
0
,
0
},
{
1
,
0
}};
migraphx
::
shape
ds
{
migraphx
::
shape
::
float_type
,
{
2
,
3
}};
migraphx
::
shape
ds
{
migraphx
::
shape
::
float_type
,
{
2
,
3
}};
migraphx
::
program
p
;
migraphx
::
program
p
;
auto
*
mm
=
p
.
get_main_module
();
auto
*
mm
=
p
.
get_main_module
();
migraphx
::
shape
cond_s
{
migraphx
::
shape
::
bool_type
};
migraphx
::
shape
cond_s
{
migraphx
::
shape
::
bool_type
};
auto
cond
=
mm
->
add_parameter
(
"cond"
,
cond_s
);
auto
cond_0
=
mm
->
add_parameter
(
"cond_0"
,
cond_s
);
auto
cond_1
=
mm
->
add_parameter
(
"cond_1"
,
cond_s
);
auto
x
=
mm
->
add_parameter
(
"x"
,
ds
);
auto
x
=
mm
->
add_parameter
(
"x"
,
ds
);
auto
y
=
mm
->
add_parameter
(
"y"
,
ds
);
auto
y
=
mm
->
add_parameter
(
"y"
,
ds
);
auto
z
=
mm
->
add_parameter
(
"z"
,
ds
);
auto
z
=
mm
->
add_parameter
(
"z"
,
ds
);
...
@@ -280,20 +327,22 @@ TEST_CASE(multitarget_compile_nested_if_then_else)
...
@@ -280,20 +327,22 @@ TEST_CASE(multitarget_compile_nested_if_then_else)
std
::
size_t
tid
)
{
std
::
size_t
tid
)
{
std
::
string
mod_name
=
std
::
string
mod_name
=
"target_"
+
std
::
to_string
(
tid
)
+
"_"
+
std
::
to_string
(
counter_map
[
tid
]
++
);
"target_"
+
std
::
to_string
(
tid
)
+
"_"
+
std
::
to_string
(
counter_map
[
tid
]
++
);
auto
*
test_mod
=
prog
.
create_module
(
mod_name
);
auto
*
test_mod
=
prog
.
create_module
(
mod_name
);
std
::
vector
<
float
>
data
=
get_random_values
(
ds
.
elements
());
std
::
vector
<
float
>
data
(
ds
.
elements
(),
-
1
);
auto
l1
=
test_mod
->
add_literal
(
migraphx
::
literal
(
ds
,
data
));
auto
l1
=
test_mod
->
add_literal
(
migraphx
::
literal
(
ds
,
data
));
auto
test_mod_param
=
test_mod
->
add_parameter
(
mod_name
,
ds
);
auto
test_mod_param_0
=
test_mod
->
add_parameter
(
mod_name
+
"_param_0"
,
ds
);
// instruction with local literal and main_mod param as inputs
auto
test_mod_param_1
=
test_mod
->
add_parameter
(
mod_name
+
"_param_1"
,
ds
);
auto
ins1
=
test_mod
->
add_instruction
(
migraphx
::
make_op
(
"add"
),
x
,
l1
);
auto
test_mod_param_2
=
test_mod
->
add_parameter
(
mod_name
+
"_param_2"
,
ds
);
// instructinon with local param and local ins as inputs
auto
ins1
=
test_mod
->
add_instruction
(
migraphx
::
make_op
(
"add"
),
test_mod_param_0
,
l1
);
auto
ins2
=
test_mod
->
add_instruction
(
migraphx
::
make_op
(
"mul"
),
ins1
,
test_mod_param
);
auto
ins2
=
test_mod
->
add_instruction
(
migraphx
::
make_op
(
"mul"
),
ins1
,
test_mod_param_1
);
// instruction with local ins and parent ins as inputs
auto
ins3
=
test_mod
->
add_instruction
(
migraphx
::
make_op
(
"sub"
),
ins2
,
test_mod_param_2
);
auto
ins3
=
test_mod
->
add_instruction
(
migraphx
::
make_op
(
"sub"
),
ins2
,
inputs
.
front
());
test_mod
->
add_return
({
ins3
});
test_mod
->
add_return
({
ins3
});
auto
*
run_on_target_mod
=
prog
.
create_module
(
"run_on_"
+
mod_name
);
auto
*
run_on_target_mod
=
prog
.
create_module
(
"run_on_"
+
mod_name
);
run_on_target_mod
->
add_instruction
(
auto
run_ins
=
run_on_target_mod
->
add_instruction
(
migraphx
::
make_op
(
"run_on_target"
,
{{
"target_id"
,
tid
}}),
{
inputs
.
front
()},
{
test_mod
});
migraphx
::
make_op
(
"run_on_target"
,
{{
"target_id"
,
tid
}}),
inputs
,
{
test_mod
});
auto
run_ins_0
=
run_on_target_mod
->
add_instruction
(
migraphx
::
make_op
(
"get_tuple_elem"
,
{{
"index"
,
0
}}),
run_ins
);
run_on_target_mod
->
add_return
({
run_ins_0
});
return
run_on_target_mod
;
return
run_on_target_mod
;
};
};
...
@@ -307,15 +356,30 @@ TEST_CASE(multitarget_compile_nested_if_then_else)
...
@@ -307,15 +356,30 @@ TEST_CASE(multitarget_compile_nested_if_then_else)
ref_mod
->
add_return
({
ref_add
});
ref_mod
->
add_return
({
ref_add
});
auto
*
then_mod
=
p
.
create_module
(
"then_mod"
);
auto
*
then_mod
=
p
.
create_module
(
"then_mod"
);
auto
then_mod_param
=
then_mod
->
add_parameter
(
"then_mod_param"
,
ds
);
auto
then_mod_cond
=
then_mod
->
add_parameter
(
"then_mod_cond"
,
cond_s
);
auto
then_mod_ref_ins
=
then_mod
->
add_instruction
(
auto
then_mod_param_0
=
then_mod
->
add_parameter
(
"then_mod_param_0"
,
ds
);
migraphx
::
make_op
(
"run_on_target"
,
{{
"target_id"
,
3
}}),
{
then_mod_param
,
y
},
{
ref_mod
});
auto
then_mod_param_1
=
then_mod
->
add_parameter
(
"then_mod_param_1"
,
ds
);
auto
then_mod_param_2
=
then_mod
->
add_parameter
(
"then_mod_param_2"
,
ds
);
auto
then_mod_ref_ins
=
then_mod
->
add_instruction
(
migraphx
::
make_op
(
"run_on_target"
,
{{
"target_id"
,
3
}}),
{
then_mod_param_0
,
then_mod_param_1
},
{
ref_mod
});
auto
then_mod_ref_ins_0
=
then_mod
->
add_instruction
(
auto
then_mod_ref_ins_0
=
then_mod
->
add_instruction
(
migraphx
::
make_op
(
"get_tuple_elem"
,
{{
"index"
,
0
}}),
then_mod_ref_ins
);
migraphx
::
make_op
(
"get_tuple_elem"
,
{{
"index"
,
0
}}),
then_mod_ref_ins
);
then_mod
->
add_instruction
(
auto
then_mod_if
=
then_mod
->
add_instruction
(
migraphx
::
make_op
(
"if"
),
migraphx
::
make_op
(
"if"
),
{
cond
},
{
then_mod_cond
,
{
create_test_module
(
p
,
{
z
},
1
),
create_test_module
(
p
,
{
then_mod_ref_ins_0
},
0
)});
then_mod_param_0
,
then_mod_param_1
,
then_mod_param_2
,
then_mod_ref_ins_0
,
then_mod_param_1
,
then_mod_param_2
},
{
create_test_module
(
p
,
{
then_mod_param_0
,
then_mod_param_1
,
then_mod_param_2
},
1
),
create_test_module
(
p
,
{
then_mod_ref_ins_0
,
then_mod_param_1
,
then_mod_param_2
},
0
)});
auto
then_mod_if_0
=
then_mod
->
add_instruction
(
migraphx
::
make_op
(
"get_tuple_elem"
,
{{
"index"
,
0
}}),
then_mod_if
);
then_mod
->
add_return
({
then_mod_if_0
});
// create nested else_mod with multiple targets.
// create nested else_mod with multiple targets.
// else_mod has one instruction that runs a module on "fpga" and another instruction that
// else_mod has one instruction that runs a module on "fpga" and another instruction that
...
@@ -326,53 +390,105 @@ TEST_CASE(multitarget_compile_nested_if_then_else)
...
@@ -326,53 +390,105 @@ TEST_CASE(multitarget_compile_nested_if_then_else)
auto
fpga_add
=
fpga_mod
->
add_instruction
(
migraphx
::
make_op
(
"add"
),
fpga_x
,
fpga_y
);
auto
fpga_add
=
fpga_mod
->
add_instruction
(
migraphx
::
make_op
(
"add"
),
fpga_x
,
fpga_y
);
fpga_mod
->
add_return
({
fpga_add
});
fpga_mod
->
add_return
({
fpga_add
});
auto
*
else_mod
=
p
.
create_module
(
"else_mod"
);
auto
*
else_mod
=
p
.
create_module
(
"else_mod"
);
auto
else_mod_param
=
else_mod
->
add_parameter
(
"else_mod_param"
,
ds
);
auto
else_mod_cond
=
else_mod
->
add_parameter
(
"else_mod_cond"
,
cond_s
);
auto
else_mod_fpga_ins
=
else_mod
->
add_instruction
(
auto
else_mod_param_0
=
else_mod
->
add_parameter
(
"else_mod_param_0"
,
ds
);
migraphx
::
make_op
(
"run_on_target"
,
{{
"target_id"
,
2
}}),
{
else_mod_param
,
y
},
{
fpga_mod
});
auto
else_mod_param_1
=
else_mod
->
add_parameter
(
"else_mod_param_1"
,
ds
);
auto
else_mod_param_2
=
else_mod
->
add_parameter
(
"else_mod_param_2"
,
ds
);
auto
else_mod_fpga_ins
=
else_mod
->
add_instruction
(
migraphx
::
make_op
(
"run_on_target"
,
{{
"target_id"
,
2
}}),
{
else_mod_param_0
,
else_mod_param_2
},
{
fpga_mod
});
auto
else_mod_fpga_ins_0
=
else_mod
->
add_instruction
(
auto
else_mod_fpga_ins_0
=
else_mod
->
add_instruction
(
migraphx
::
make_op
(
"get_tuple_elem"
,
{{
"index"
,
0
}}),
else_mod_fpga_ins
);
migraphx
::
make_op
(
"get_tuple_elem"
,
{{
"index"
,
0
}}),
else_mod_fpga_ins
);
else_mod
->
add_instruction
(
migraphx
::
make_op
(
"if"
),
auto
else_mod_if
=
else_mod
->
add_instruction
(
{
cond
},
migraphx
::
make_op
(
"if"
),
{
create_test_module
(
p
,
{
else_mod_fpga_ins_0
},
0
),
{
else_mod_cond
,
create_test_module
(
p
,
{
else_mod_param
},
1
)});
else_mod_fpga_ins_0
,
else_mod_param_0
,
else_mod_param_1
,
else_mod_param_2
,
else_mod_param_1
,
else_mod_param_0
},
{
create_test_module
(
p
,
{
else_mod_fpga_ins_0
,
else_mod_param_0
,
else_mod_param_1
},
0
),
create_test_module
(
p
,
{
else_mod_param_2
,
else_mod_param_1
,
else_mod_param_0
},
1
)});
auto
else_mod_if_0
=
else_mod
->
add_instruction
(
migraphx
::
make_op
(
"get_tuple_elem"
,
{{
"index"
,
0
}}),
else_mod_if
);
else_mod
->
add_return
({
else_mod_if_0
});
// Create nested and multi-target main module using "If"
// Create nested and multi-target main module using "If"
auto
main_if_ins
=
auto
main_if_ins
=
mm
->
add_instruction
(
mm
->
add_instruction
(
migraphx
::
make_op
(
"if"
),
{
cond
,
x
},
{
then_mod
,
else_mod
});
migraphx
::
make_op
(
"if"
),
{
cond
_0
,
cond_1
,
x
,
y
,
z
,
cond_1
,
x
,
y
,
z
},
{
then_mod
,
else_mod
});
auto
r
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"get_tuple_elem"
,
{{
"index"
,
0
}}),
main_if_ins
);
auto
r
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"get_tuple_elem"
,
{{
"index"
,
0
}}),
main_if_ins
);
mm
->
add_return
({
r
});
mm
->
add_return
({
r
});
// compile
// compile
migraphx
::
compile_options
gpu_opts
;
gpu_opts
.
offload_copy
=
true
;
p
.
compile
({
migraphx
::
make_target
(
"gpu"
),
p
.
compile
({
migraphx
::
make_target
(
"gpu"
),
migraphx
::
make_target
(
"cpu"
),
migraphx
::
make_target
(
"cpu"
),
migraphx
::
make_target
(
"fpga"
),
migraphx
::
make_target
(
"ref"
),
migraphx
::
make_target
(
"ref"
)});
migraphx
::
make_target
(
"ref"
)},
{
gpu_opts
});
EXPECT
(
check_compiled_program
(
p
,
EXPECT
(
check_compiled_program
(
p
,
{
migraphx
::
make_target
(
"gpu"
),
{
migraphx
::
make_target
(
"gpu"
),
migraphx
::
make_target
(
"cpu"
),
migraphx
::
make_target
(
"cpu"
),
migraphx
::
make_target
(
"
fpga
"
),
migraphx
::
make_target
(
"
ref
"
),
migraphx
::
make_target
(
"ref"
)}));
migraphx
::
make_target
(
"ref"
)}));
// do evaluation using different conditions
// TODO: make two conditional to cover all the paths
migraphx
::
parameter_map
params
;
float
x_i
=
2.0
;
float
y_i
=
3.0
;
float
z_i
=
4.0
;
params
[
"x"
]
=
migraphx
::
fill_argument
(
ds
,
x_i
);
params
[
"y"
]
=
migraphx
::
fill_argument
(
ds
,
y_i
);
params
[
"z"
]
=
migraphx
::
fill_argument
(
ds
,
z_i
);
// cover all paths with different combination of conditions
std
::
vector
<
std
::
pair
<
bool
,
bool
>>
test_conds
=
{
{
true
,
true
},
{
true
,
false
},
{
false
,
true
},
{
false
,
false
}};
for
(
auto
[
cond_val_0
,
cond_val_1
]
:
test_conds
)
{
params
[
"cond_0"
]
=
migraphx
::
argument
(
cond_s
,
&
cond_val_0
);
params
[
"cond_1"
]
=
migraphx
::
argument
(
cond_s
,
&
cond_val_1
);
auto
result
=
p
.
eval
(
params
).
back
();
// main has one instruction that is : if_then_else
// then mod is doing : {tmp = x+y; (cond) ? (((x-1)*y)-z) : (((tmp-1)*y)-z);}
// else mod is doing : {tmp = x+z; (cond) ? (((tmp-1)*x)-y) : (((z-1)*y)-x);}
float
gold_i
=
-
1.0
;
if
(
cond_val_0
)
{
float
tmp_i
=
x_i
+
y_i
;
gold_i
=
(
cond_val_1
)
?
(((
x_i
-
1
)
*
y_i
)
-
z_i
)
:
(((
tmp_i
-
1
)
*
y_i
)
-
z_i
);
}
else
{
float
tmp_i
=
x_i
+
z_i
;
gold_i
=
(
cond_val_1
)
?
(((
tmp_i
-
1
)
*
x_i
)
-
y_i
)
:
(((
z_i
-
1
)
*
y_i
)
-
x_i
);
}
auto
gold
=
migraphx
::
fill_argument
(
ds
,
gold_i
);
EXPECT
(
gold
==
result
);
}
}
}
// TODO : FPGA compilation is broken right now, below test mentions fpga but doesn't compile for it
TEST_CASE
(
multitarget_select_module
)
TEST_CASE
(
multitarget_select_module
)
{
{
migraphx
::
program
p
;
migraphx
::
program
p
;
auto
*
mm
=
p
.
get_main_module
();
migraphx
::
shape
lit_s
{
migraphx
::
shape
{
migraphx
::
shape
::
float_type
,
{
1
}}};
auto
literal_ins
=
mm
->
add_literal
(
migraphx
::
literal
{
lit_s
,
{
6
}});
// create batch submodules
// create batch submodules
auto
create_submodule
=
[
&
](
std
::
size_t
batch_size
,
const
std
::
string
&
module_name
)
{
auto
create_submodule
=
[
&
](
std
::
size_t
batch_size
,
const
std
::
string
&
module_name
)
{
auto
*
submod
=
p
.
create_module
(
module_name
);
auto
*
submod
=
p
.
create_module
(
module_name
);
migraphx
::
shape
sm_shape
{
migraphx
::
shape
::
float_type
,
{
batch_size
,
4
}};
migraphx
::
shape
sm_shape
{
migraphx
::
shape
::
float_type
,
{
batch_size
,
4
}};
auto
sm_input
=
submod
->
add_parameter
(
"data"
,
sm_shape
);
auto
sm_input
=
submod
->
add_parameter
(
"data"
,
sm_shape
);
migraphx
::
shape
lit_s
{
migraphx
::
shape
{
migraphx
::
shape
::
float_type
,
{
1
}}};
auto
literal_ins
=
submod
->
add_literal
(
migraphx
::
literal
{
lit_s
,
{
6
}});
auto
broadcast_lit
=
auto
broadcast_lit
=
submod
->
add_instruction
(
migraphx
::
make_op
(
"multibroadcast"
),
literal_ins
,
sm_input
);
submod
->
add_instruction
(
migraphx
::
make_op
(
"multibroadcast"
),
literal_ins
,
sm_input
);
auto
add_ins0
=
submod
->
add_instruction
(
migraphx
::
make_op
(
"add"
),
sm_input
,
broadcast_lit
);
auto
add_ins0
=
submod
->
add_instruction
(
migraphx
::
make_op
(
"add"
),
sm_input
,
broadcast_lit
);
auto
add_ins1
=
submod
->
add_instruction
(
migraphx
::
make_op
(
"add"
),
add_ins0
,
broadcast_lit
);
auto
add_ins1
=
submod
->
add_instruction
(
migraphx
::
make_op
(
"add"
),
add_ins0
,
broadcast_lit
);
submod
->
add_return
({
add_ins0
,
add_ins1
});
submod
->
add_return
({
add_ins1
});
return
submod
;
return
submod
;
};
};
auto
*
batch1
=
create_submodule
(
1
,
"batch_1"
);
auto
*
batch1
=
create_submodule
(
1
,
"batch_1"
);
...
@@ -380,36 +496,45 @@ TEST_CASE(multitarget_select_module)
...
@@ -380,36 +496,45 @@ TEST_CASE(multitarget_select_module)
auto
*
batch3
=
create_submodule
(
3
,
"batch_3"
);
auto
*
batch3
=
create_submodule
(
3
,
"batch_3"
);
auto
*
batch4
=
create_submodule
(
4
,
"batch_4"
);
auto
*
batch4
=
create_submodule
(
4
,
"batch_4"
);
migraphx
::
shape
s
{
migraphx
::
shape
::
float_type
,
{{
1
,
4
},
{
4
,
4
}}};
auto
input
=
mm
->
add_parameter
(
"data"
,
s
);
auto
*
run_cpu_mod
=
p
.
create_module
(
"cpu_mod"
);
auto
*
run_cpu_mod
=
p
.
create_module
(
"cpu_mod"
);
auto
cpu_param
=
run_cpu_mod
->
add_parameter
(
auto
cpu_param
=
"cpu_
data"
,
migraphx
::
shape
{
migraphx
::
shape
::
float_type
,
{
1
,
4
}});
run_cpu_mod
->
add_parameter
(
"
data"
,
migraphx
::
shape
{
migraphx
::
shape
::
float_type
,
{
1
,
4
}});
auto
run_cpu_ins
=
run_cpu_mod
->
add_instruction
(
auto
run_cpu_ins
=
run_cpu_mod
->
add_instruction
(
migraphx
::
make_op
(
"run_on_target"
,
{{
"target_id"
,
1
}}),
{
cpu_param
},
{
batch1
});
migraphx
::
make_op
(
"run_on_target"
,
{{
"target_id"
,
1
}}),
{
cpu_param
},
{
batch1
});
run_cpu_mod
->
add_return
({
run_cpu_ins
});
auto
run_cpu_ins_0
=
run_cpu_mod
->
add_instruction
(
migraphx
::
make_op
(
"get_tuple_elem"
,
{{
"index"
,
0
}}),
run_cpu_ins
);
run_cpu_mod
->
add_return
({
run_cpu_ins_0
});
auto
*
run_gpu_mod
=
p
.
create_module
(
"gpu_mod"
);
auto
*
run_gpu_mod
=
p
.
create_module
(
"gpu_mod"
);
auto
gpu_param
=
run_gpu_mod
->
add_parameter
(
auto
gpu_param
=
"gpu_
data"
,
migraphx
::
shape
{
migraphx
::
shape
::
float_type
,
{
2
,
4
}});
run_gpu_mod
->
add_parameter
(
"
data"
,
migraphx
::
shape
{
migraphx
::
shape
::
float_type
,
{
2
,
4
}});
auto
run_gpu_ins
=
run_gpu_mod
->
add_instruction
(
auto
run_gpu_ins
=
run_gpu_mod
->
add_instruction
(
migraphx
::
make_op
(
"run_on_target"
,
{{
"target_id"
,
0
}}),
{
gpu_param
},
{
batch2
});
migraphx
::
make_op
(
"run_on_target"
,
{{
"target_id"
,
0
}}),
{
gpu_param
},
{
batch2
});
run_gpu_mod
->
add_return
({
run_gpu_ins
});
auto
run_gpu_ins_0
=
run_gpu_mod
->
add_instruction
(
migraphx
::
make_op
(
"get_tuple_elem"
,
{{
"index"
,
0
}}),
run_gpu_ins
);
run_gpu_mod
->
add_return
({
run_gpu_ins_0
});
auto
*
run_fpga_mod
=
p
.
create_module
(
"fpga_mod"
);
auto
*
run_fpga_mod
=
p
.
create_module
(
"fpga_mod"
);
auto
fpga_param
=
run_fpga_mod
->
add_parameter
(
auto
fpga_param
=
"
fpga_data"
,
migraphx
::
shape
{
migraphx
::
shape
::
float_type
,
{
3
,
4
}});
run_
fpga_
mod
->
add_parameter
(
"
data"
,
migraphx
::
shape
{
migraphx
::
shape
::
float_type
,
{
3
,
4
}});
auto
run_fpga_ins
=
run_fpga_mod
->
add_instruction
(
auto
run_fpga_ins
=
run_fpga_mod
->
add_instruction
(
migraphx
::
make_op
(
"run_on_target"
,
{{
"target_id"
,
2
}}),
{
fpga_param
},
{
batch3
});
migraphx
::
make_op
(
"run_on_target"
,
{{
"target_id"
,
2
}}),
{
fpga_param
},
{
batch3
});
run_fpga_mod
->
add_return
({
run_fpga_ins
});
auto
run_fpga_ins_0
=
run_fpga_mod
->
add_instruction
(
migraphx
::
make_op
(
"get_tuple_elem"
,
{{
"index"
,
0
}}),
run_fpga_ins
);
run_fpga_mod
->
add_return
({
run_fpga_ins_0
});
auto
*
run_ref_mod
=
p
.
create_module
(
"ref_mod"
);
auto
*
run_ref_mod
=
p
.
create_module
(
"ref_mod"
);
auto
ref_param
=
run_fpga_mod
->
add_parameter
(
auto
ref_param
=
"ref_
data"
,
migraphx
::
shape
{
migraphx
::
shape
::
float_type
,
{
4
,
4
}});
run_ref_mod
->
add_parameter
(
"
data"
,
migraphx
::
shape
{
migraphx
::
shape
::
float_type
,
{
4
,
4
}});
auto
run_ref_ins
=
run_ref_mod
->
add_instruction
(
auto
run_ref_ins
=
run_ref_mod
->
add_instruction
(
migraphx
::
make_op
(
"run_on_target"
,
{{
"target_id"
,
3
}}),
{
ref_param
},
{
batch4
});
migraphx
::
make_op
(
"run_on_target"
,
{{
"target_id"
,
3
}}),
{
ref_param
},
{
batch4
});
run_ref_mod
->
add_return
({
run_ref_ins
});
auto
run_ref_ins_0
=
run_ref_mod
->
add_instruction
(
migraphx
::
make_op
(
"get_tuple_elem"
,
{{
"index"
,
0
}}),
run_ref_ins
);
run_ref_mod
->
add_return
({
run_ref_ins_0
});
auto
*
mm
=
p
.
get_main_module
();
migraphx
::
shape
dyn_s
{
migraphx
::
shape
::
float_type
,
{{
1
,
4
},
{
4
,
4
}}};
auto
input
=
mm
->
add_parameter
(
"data"
,
dyn_s
);
std
::
vector
<
migraphx
::
shape
>
sub_shapes
=
{};
std
::
vector
<
migraphx
::
shape
>
sub_shapes
=
{};
sub_shapes
.
push_back
(
migraphx
::
shape
{
migraphx
::
shape
::
float_type
,
{{
1
,
4
},
{
4
,
4
}}});
sub_shapes
.
push_back
(
migraphx
::
shape
{
migraphx
::
shape
::
float_type
,
{{
1
,
4
},
{
4
,
4
}}});
sub_shapes
.
push_back
(
migraphx
::
shape
{
migraphx
::
shape
::
float_type
,
{{
1
,
4
},
{
4
,
4
}}});
sub_shapes
.
push_back
(
migraphx
::
shape
{
migraphx
::
shape
::
float_type
,
{{
1
,
4
},
{
4
,
4
}}});
...
@@ -419,18 +544,34 @@ TEST_CASE(multitarget_select_module)
...
@@ -419,18 +544,34 @@ TEST_CASE(multitarget_select_module)
{
input
},
{
input
},
{
run_cpu_mod
,
run_gpu_mod
,
run_fpga_mod
,
run_ref_mod
});
{
run_cpu_mod
,
run_gpu_mod
,
run_fpga_mod
,
run_ref_mod
});
auto
ret0
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"get_tuple_elem"
,
{{
"index"
,
0
}}),
sm_ins
);
auto
ret0
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"get_tuple_elem"
,
{{
"index"
,
0
}}),
sm_ins
);
auto
ret1
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"get_tuple_elem"
,
{{
"index"
,
1
}}),
sm_ins
);
mm
->
add_return
({
ret0
});
mm
->
add_return
({
ret0
,
ret1
});
// compile
// compile
migraphx
::
compile_options
gpu_opts
;
gpu_opts
.
offload_copy
=
true
;
p
.
compile
({
migraphx
::
make_target
(
"gpu"
),
p
.
compile
({
migraphx
::
make_target
(
"gpu"
),
migraphx
::
make_target
(
"cpu"
),
migraphx
::
make_target
(
"cpu"
),
migraphx
::
make_target
(
"fpga"
),
migraphx
::
make_target
(
"ref"
),
migraphx
::
make_target
(
"ref"
)});
migraphx
::
make_target
(
"ref"
)},
{
gpu_opts
});
EXPECT
(
check_compiled_program
(
p
,
EXPECT
(
check_compiled_program
(
p
,
{
migraphx
::
make_target
(
"gpu"
),
{
migraphx
::
make_target
(
"gpu"
),
migraphx
::
make_target
(
"cpu"
),
migraphx
::
make_target
(
"cpu"
),
migraphx
::
make_target
(
"
fpga
"
),
migraphx
::
make_target
(
"
ref
"
),
migraphx
::
make_target
(
"ref"
)}));
migraphx
::
make_target
(
"ref"
)}));
// program does the 12+x where x has dynamic shape {{1, 4}, {4, 4}}
for
(
const
size_t
bs
:
{
1
,
2
,
3
,
4
})
{
migraphx
::
shape
arg_shape
{
migraphx
::
shape
::
float_type
,
{
bs
,
4
}};
migraphx
::
parameter_map
params
;
params
[
"data"
]
=
migraphx
::
generate_argument
(
arg_shape
,
arg_shape
.
elements
());
std
::
vector
<
float
>
input_data
;
params
[
"data"
].
visit
([
&
](
const
auto
&
vec
)
{
input_data
.
assign
(
vec
.
begin
(),
vec
.
end
());
});
std
::
transform
(
input_data
.
begin
(),
input_data
.
end
(),
input_data
.
begin
(),
[](
const
auto
&
i
)
{
return
i
+
12.0
;
});
auto
result
=
p
.
eval
(
params
).
back
();
EXPECT
(
migraphx
::
argument
(
arg_shape
,
input_data
.
data
())
==
result
);
}
}
}
int
main
(
int
argc
,
const
char
*
argv
[])
{
test
::
run
(
argc
,
argv
);
}
int
main
(
int
argc
,
const
char
*
argv
[])
{
test
::
run
(
argc
,
argv
);
}
test/onnx/.onnxrt-commit
View file @
97d4bb6c
3be6eb53c8b359703cb645ed2cb1cdf106924b7c
d3295f4329d744fe1f8419e1220e123807282b99
test/onnx/conv_transpose_auto_pad_test.onnx
0 → 100644
View file @
97d4bb6c
conv_transpose_auto_pad_test:±
:
x
wyconv1" ConvTranspose*
auto_pad"
SAME_UPPER conv_transpose_auto_pad_testZ
x
Z
w
b
y
B
\ No newline at end of file
test/onnx/
de
conv_bias_test.onnx
→
test/onnx/conv_
transpose_
bias_test.onnx
View file @
97d4bb6c
deconv
_bias_test:
ž
conv_transpose
_bias_test:
¦
"
"
x
x
w
w
byconv1" ConvTranspose
de
conv_bias_testZ
byconv1" ConvTranspose
conv_
transpose_
bias_testZ
x
x
...
@@ -24,4 +24,4 @@
...
@@ -24,4 +24,4 @@
B
B
\ No newline at end of file
test/onnx/conv_transpose_dyn_asym_padding_test.onnx
0 → 100644
View file @
97d4bb6c
File added
Prev
1
2
3
4
5
6
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment