Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
97d4bb6c
Unverified
Commit
97d4bb6c
authored
Jul 25, 2023
by
Ted Themistokleous
Committed by
GitHub
Jul 25, 2023
Browse files
Merge branch 'develop' into add_parity_check_ci
parents
39b097c7
bdbc38bc
Changes
106
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
416 additions
and
181 deletions
+416
-181
src/targets/gpu/fuse_mlir.cpp
src/targets/gpu/fuse_mlir.cpp
+1
-0
src/targets/gpu/include/migraphx/gpu/contiguous.hpp
src/targets/gpu/include/migraphx/gpu/contiguous.hpp
+0
-2
src/targets/gpu/include/migraphx/gpu/convolution.hpp
src/targets/gpu/include/migraphx/gpu/convolution.hpp
+53
-28
src/targets/gpu/include/migraphx/gpu/miopen.hpp
src/targets/gpu/include/migraphx/gpu/miopen.hpp
+29
-7
src/targets/gpu/kernels/include/migraphx/kernels/debug.hpp
src/targets/gpu/kernels/include/migraphx/kernels/debug.hpp
+4
-2
src/targets/gpu/lowering.cpp
src/targets/gpu/lowering.cpp
+1
-1
src/targets/gpu/mlir.cpp
src/targets/gpu/mlir.cpp
+27
-10
src/targets/gpu/rocblas.cpp
src/targets/gpu/rocblas.cpp
+7
-0
src/targets/ref/lowering.cpp
src/targets/ref/lowering.cpp
+1
-1
src/tf/parse_batchnorm.cpp
src/tf/parse_batchnorm.cpp
+5
-6
src/verify_args.cpp
src/verify_args.cpp
+13
-13
test/api/test_gpu.cpp
test/api/test_gpu.cpp
+0
-1
test/gpu/codegen_literal.cpp
test/gpu/codegen_literal.cpp
+1
-1
test/gpu/manage_host_buffer.cpp
test/gpu/manage_host_buffer.cpp
+1
-1
test/gpu/quantization.cpp
test/gpu/quantization.cpp
+3
-3
test/multi_target/multitarget_test.cpp
test/multi_target/multitarget_test.cpp
+242
-101
test/onnx/.onnxrt-commit
test/onnx/.onnxrt-commit
+1
-1
test/onnx/conv_transpose_auto_pad_test.onnx
test/onnx/conv_transpose_auto_pad_test.onnx
+24
-0
test/onnx/conv_transpose_bias_test.onnx
test/onnx/conv_transpose_bias_test.onnx
+3
-3
test/onnx/conv_transpose_dyn_asym_padding_test.onnx
test/onnx/conv_transpose_dyn_asym_padding_test.onnx
+0
-0
No files found.
src/targets/gpu/fuse_mlir.cpp
View file @
97d4bb6c
...
@@ -216,6 +216,7 @@ struct find_mlir_op
...
@@ -216,6 +216,7 @@ struct find_mlir_op
"quant_dot"
,
"quant_dot"
,
"add"
,
"add"
,
"clip"
,
"clip"
,
"relu"
,
"sub"
,
"sub"
,
"mul"
,
"mul"
,
"div"
,
"div"
,
...
...
src/targets/gpu/include/migraphx/gpu/contiguous.hpp
View file @
97d4bb6c
...
@@ -41,8 +41,6 @@ struct miopen_contiguous : unary_device<miopen_contiguous, &device::contiguous>
...
@@ -41,8 +41,6 @@ struct miopen_contiguous : unary_device<miopen_contiguous, &device::contiguous>
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
{
check_shapes
{
inputs
,
*
this
}.
has
(
2
);
check_shapes
{
inputs
,
*
this
}.
has
(
2
);
if
(
inputs
.
front
().
standard
())
return
inputs
.
front
();
auto
lens
=
inputs
.
at
(
0
).
lens
();
auto
lens
=
inputs
.
at
(
0
).
lens
();
auto
t
=
inputs
.
at
(
0
).
type
();
auto
t
=
inputs
.
at
(
0
).
type
();
return
{
t
,
lens
};
return
{
t
,
lens
};
...
...
src/targets/gpu/include/migraphx/gpu/convolution.hpp
View file @
97d4bb6c
...
@@ -31,7 +31,7 @@
...
@@ -31,7 +31,7 @@
#include <migraphx/op/identity.hpp>
#include <migraphx/op/identity.hpp>
#include <migraphx/op/convolution.hpp>
#include <migraphx/op/convolution.hpp>
#include <migraphx/op/quant_convolution.hpp>
#include <migraphx/op/quant_convolution.hpp>
#include <migraphx/op/
de
convolution.hpp>
#include <migraphx/op/convolution
_backwards
.hpp>
#include <unordered_map>
#include <unordered_map>
#include <migraphx/reflect.hpp>
#include <migraphx/reflect.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/context.hpp>
...
@@ -146,7 +146,8 @@ struct miopen_convolution
...
@@ -146,7 +146,8 @@ struct miopen_convolution
void
set_conv_descriptor
()
void
set_conv_descriptor
()
{
{
cd
=
(
op
.
name
()
==
"deconvolution"
)
?
make_deconv
(
op
)
:
make_conv
(
op
);
cd
=
(
op
.
name
()
==
"convolution_backwards"
)
?
make_convolution_backwards
(
op
)
:
make_conv
(
op
);
}
}
value
compile
(
migraphx
::
context
&
ctx
,
const
shape
&
output
,
const
std
::
vector
<
shape
>&
input
)
value
compile
(
migraphx
::
context
&
ctx
,
const
shape
&
output
,
const
std
::
vector
<
shape
>&
input
)
...
@@ -162,7 +163,28 @@ struct miopen_convolution
...
@@ -162,7 +163,28 @@ struct miopen_convolution
auto
x_desc
=
make_tensor
(
reshape_if_1d
(
inputs
[
0
]),
int8_x4_format
);
auto
x_desc
=
make_tensor
(
reshape_if_1d
(
inputs
[
0
]),
int8_x4_format
);
auto
w_desc
=
make_tensor
(
reshape_if_1d
(
inputs
[
1
]),
int8_x4_format
);
auto
w_desc
=
make_tensor
(
reshape_if_1d
(
inputs
[
1
]),
int8_x4_format
);
auto
y_desc
=
make_tensor
(
reshape_if_1d
(
output_shape
));
auto
y_desc
=
make_tensor
(
reshape_if_1d
(
output_shape
));
auto
*
miopen_stream_handle
=
ctx
.
get_stream
().
get_miopen
();
std
::
size_t
workspace_size
=
0
;
std
::
size_t
workspace_size
=
0
;
auto
status
=
miopenConvolutionForwardGetWorkSpaceSize
(
miopen_stream_handle
,
w_desc
.
get
(),
x_desc
.
get
(),
cd
.
get
(),
y_desc
.
get
(),
&
workspace_size
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen"
+
op
.
name
()
+
" : Failed to get forward workspace size"
);
workspace_shape
=
shape
{
shape
::
int8_type
,
{
workspace_size
}};
auto
x_shape
=
inputs
[
0
];
auto
w_shape
=
inputs
[
1
];
if
(
int8_x4_format
)
{
x_shape
=
pack_int8_shape
(
x_shape
);
w_shape
=
pack_int8_shape
(
w_shape
);
}
#ifdef MIGRAPHX_HAS_FIND_2_API
#ifdef MIGRAPHX_HAS_FIND_2_API
{
{
auto
conv_problem
=
make_obj
<
miopen_problem
>
(
auto
conv_problem
=
make_obj
<
miopen_problem
>
(
...
@@ -170,13 +192,34 @@ struct miopen_convolution
...
@@ -170,13 +192,34 @@ struct miopen_convolution
set_tensor_descriptor
(
miopenTensorConvolutionX
,
x_desc
,
conv_problem
);
set_tensor_descriptor
(
miopenTensorConvolutionX
,
x_desc
,
conv_problem
);
set_tensor_descriptor
(
miopenTensorConvolutionW
,
w_desc
,
conv_problem
);
set_tensor_descriptor
(
miopenTensorConvolutionW
,
w_desc
,
conv_problem
);
bool
preallocate
=
false
;
#ifdef MIGRAPHX_PREALLOCATE_MIOPEN_BUFFERS
// MIOpen has APIs to pass pre-allocated buffers starting from rocm-5.6
preallocate
=
true
;
#endif
auto
x
=
preallocate
?
to_gpu
(
generate_argument
(
x_shape
))
:
inputs
[
0
];
auto
w
=
preallocate
?
to_gpu
(
generate_argument
(
w_shape
))
:
inputs
[
1
];
auto
y
=
preallocate
?
allocate_gpu
(
output_shape
)
:
inputs
[
2
];
auto
workspace
=
preallocate
?
allocate_gpu
(
workspace_shape
)
:
migraphx
::
argument
(
workspace_shape
);
set_tensor_descriptor
(
miopenTensorConvolutionY
,
y_desc
,
conv_problem
);
set_tensor_descriptor
(
miopenTensorConvolutionY
,
y_desc
,
conv_problem
);
auto
*
miopen_stream_handle
=
ctx
.
get_stream
().
get_miopen
();
const
miopenTensorArgument_t
tensor_args
[
3
]
=
{
{
miopenTensorConvolutionX
,
nullptr
,
x
.
implicit
()},
{
miopenTensorConvolutionW
,
nullptr
,
w
.
implicit
()},
{
miopenTensorConvolutionY
,
nullptr
,
y
.
implicit
()},
};
solution_ptr
=
find_solution
(
solution_ptr
=
find_solution
(
miopen_stream_handle
,
miopen_stream_handle
,
conv_problem
.
get
(),
ctx
.
get_exhaustive_tune_flag
());
3
,
auto
status
=
miopenGetSolutionWorkspaceSize
(
solution_ptr
.
get
(),
&
workspace_size
);
tensor_args
,
workspace
.
implicit
(),
workspace_size
,
conv_problem
.
get
(),
ctx
.
get_exhaustive_tune_flag
());
status
=
miopenGetSolutionWorkspaceSize
(
solution_ptr
.
get
(),
&
workspace_size
);
if
(
status
!=
miopenStatusSuccess
)
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen"
+
op
.
name
()
+
" : failed to get solution's workspace size"
);
MIGRAPHX_THROW
(
"MIOpen"
+
op
.
name
()
+
" : failed to get solution's workspace size"
);
...
@@ -195,29 +238,10 @@ struct miopen_convolution
...
@@ -195,29 +238,10 @@ struct miopen_convolution
return
shape
{
shape
::
int8_type
,
{
workspace_size
}};
return
shape
{
shape
::
int8_type
,
{
workspace_size
}};
}
}
#else
#else
auto
status
=
miopenConvolutionForwardGetWorkSpaceSize
(
ctx
.
get_stream
().
get_miopen
(),
w_desc
.
get
(),
x_desc
.
get
(),
cd
.
get
(),
y_desc
.
get
(),
&
workspace_size
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen"
+
op
.
name
()
+
" : Failed to get forward workspace size"
);
workspace_shape
=
shape
{
shape
::
int8_type
,
{
workspace_size
}};
auto
x_shape
=
inputs
[
0
];
auto
w_shape
=
inputs
[
1
];
if
(
int8_x4_format
)
{
x_shape
=
pack_int8_shape
(
x_shape
);
w_shape
=
pack_int8_shape
(
w_shape
);
}
auto
x
=
to_gpu
(
generate_argument
(
x_shape
));
auto
x
=
to_gpu
(
generate_argument
(
x_shape
));
auto
w
=
to_gpu
(
generate_argument
(
w_shape
));
auto
w
=
to_gpu
(
generate_argument
(
w_shape
));
auto
y
=
allocate_gpu
(
output_shape
);
auto
y
=
allocate_gpu
(
output_shape
);
auto
workspace
=
allocate_gpu
(
workspace_shape
);
auto
workspace
=
allocate_gpu
(
workspace_shape
);
int
algo_count
=
1
;
int
algo_count
=
1
;
miopenConvAlgoPerf_t
perf
;
miopenConvAlgoPerf_t
perf
;
status
=
miopenFindConvolutionForwardAlgorithm
(
ctx
.
get_stream
().
get_miopen
(),
status
=
miopenFindConvolutionForwardAlgorithm
(
ctx
.
get_stream
().
get_miopen
(),
...
@@ -337,6 +361,7 @@ struct miopen_convolution
...
@@ -337,6 +361,7 @@ struct miopen_convolution
return
{
s
.
type
(),
lens
,
strides
};
return
{
s
.
type
(),
lens
,
strides
};
}
}
};
};
}
// namespace gpu
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
}
// namespace migraphx
...
...
src/targets/gpu/include/migraphx/gpu/miopen.hpp
View file @
97d4bb6c
...
@@ -75,21 +75,43 @@ using miopen_find_options = MIGRAPHX_MANAGE_PTR(miopenFindOptions_t, miopenDestr
...
@@ -75,21 +75,43 @@ using miopen_find_options = MIGRAPHX_MANAGE_PTR(miopenFindOptions_t, miopenDestr
using
miopen_problem
=
MIGRAPHX_MANAGE_PTR
(
miopenProblem_t
,
miopenDestroyProblem
);
using
miopen_problem
=
MIGRAPHX_MANAGE_PTR
(
miopenProblem_t
,
miopenDestroyProblem
);
using
miopen_solution
=
MIGRAPHX_MANAGE_PTR
(
miopenSolution_t
,
miopenDestroySolution
);
using
miopen_solution
=
MIGRAPHX_MANAGE_PTR
(
miopenSolution_t
,
miopenDestroySolution
);
inline
miopen_solution
inline
miopen_solution
find_solution
(
miopenHandle_t
handle
,
find_solution
(
miopenHandle_t
handle
,
miopenProblem_t
problem
,
bool
tune
=
false
)
size_t
num_inputs
,
const
miopenTensorArgument_t
*
tensor_args
,
void
*
workspace
,
size_t
workspace_size
,
miopenProblem_t
problem
,
bool
tune
=
false
)
{
{
miopenSolution_t
solution
;
miopenSolution_t
solution
;
size_t
found
=
0
;
size_t
found
=
0
;
miopen_find_options
fo
=
nullptr
;
miopen_find_options
fo
=
make_obj
<
miopen_find_options
>
(
&
miopenCreateFindOptions
)
;
if
(
tune
)
if
(
tune
)
{
{
fo
=
make_obj
<
miopen_find_options
>
(
&
miopenCreateFindOptions
);
miopenSetFindOptionTuning
(
fo
.
get
(),
1
);
miopenSetFindOptionTuning
(
fo
.
get
(),
1
);
}
}
auto
status
=
miopenFindSolutions
(
handle
,
problem
,
fo
.
get
(),
&
solution
,
&
found
,
1
);
#ifdef MIGRAPHX_PREALLOCATE_MIOPEN_BUFFERS
for
(
auto
i
:
range
(
num_inputs
))
{
auto
status
=
miopenSetFindOptionPreallocatedTensor
(
fo
.
get
(),
tensor_args
[
i
].
id
,
tensor_args
[
i
].
buffer
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen: failed to preallocate tensors for the find process"
);
}
auto
status
=
miopenSetFindOptionPreallocatedWorkspace
(
fo
.
get
(),
workspace
,
workspace_size
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen: failed to preallocate workspace for the find process"
);
#else
miopenStatus_t
status
;
(
void
)(
num_inputs
);
(
void
)(
tensor_args
);
(
void
)(
workspace_size
);
(
void
)(
workspace
);
#endif
status
=
miopenFindSolutions
(
handle
,
problem
,
fo
.
get
(),
&
solution
,
&
found
,
1
);
auto
result
=
miopen_solution
{
solution
};
auto
result
=
miopen_solution
{
solution
};
if
(
status
!=
miopenStatusSuccess
or
found
==
0
)
if
(
status
!=
miopenStatusSuccess
or
found
==
0
)
MIGRAPHX_THROW
(
"MIOpen miopenFindSolutions failed"
);
MIGRAPHX_THROW
(
"MIOpen
:
miopenFindSolutions failed"
);
return
result
;
return
result
;
}
}
...
@@ -170,7 +192,7 @@ inline convolution_descriptor make_conv(const T& op)
...
@@ -170,7 +192,7 @@ inline convolution_descriptor make_conv(const T& op)
}
}
template
<
class
T
>
template
<
class
T
>
inline
convolution_descriptor
make_
de
conv
(
const
T
&
op
)
inline
convolution_descriptor
make_conv
olution_backwards
(
const
T
&
op
)
{
{
auto
c
=
make_obj
<
convolution_descriptor
>
(
&
miopenCreateConvolutionDescriptor
);
auto
c
=
make_obj
<
convolution_descriptor
>
(
&
miopenCreateConvolutionDescriptor
);
miopenConvolutionMode_t
c_mode
=
miopenTranspose
;
miopenConvolutionMode_t
c_mode
=
miopenTranspose
;
...
...
src/targets/gpu/kernels/include/migraphx/kernels/debug.hpp
View file @
97d4bb6c
...
@@ -122,12 +122,14 @@ struct source_location_capture
...
@@ -122,12 +122,14 @@ struct source_location_capture
{
{
T
x
;
T
x
;
source_location
loc
;
source_location
loc
;
template
<
class
U
,
class
=
decltype
(
T
(
U
{}
))>
// declval is a workaround since default constructor for "U" is not working with rocm-5.6
template
<
class
U
>
static
U
&&
declval
();
template
<
class
U
,
class
=
decltype
(
T
(
declval
<
U
>()))
>
constexpr
source_location_capture
(
U
px
,
source_location
ploc
=
source_location
{})
constexpr
source_location_capture
(
U
px
,
source_location
ploc
=
source_location
{})
:
x
(
px
),
loc
(
ploc
)
:
x
(
px
),
loc
(
ploc
)
{
{
}
}
constexpr
operator
source_location
()
const
{
return
loc
;
}
constexpr
operator
source_location
()
const
{
return
loc
;
}
constexpr
operator
T
()
const
{
return
x
;
}
constexpr
operator
T
()
const
{
return
x
;
}
...
...
src/targets/gpu/lowering.cpp
View file @
97d4bb6c
...
@@ -106,7 +106,7 @@ struct miopen_apply
...
@@ -106,7 +106,7 @@ struct miopen_apply
add_extend_op
(
"topk"
);
add_extend_op
(
"topk"
);
add_convolution_op
(
"convolution"
);
add_convolution_op
(
"convolution"
);
add_convolution_op
(
"
de
convolution"
);
add_convolution_op
(
"convolution
_backwards
"
);
add_convolution_op
(
"quant_convolution"
);
add_convolution_op
(
"quant_convolution"
);
add_gemm_op
<
op
::
dot
>
(
"dot"
);
add_gemm_op
<
op
::
dot
>
(
"dot"
);
add_gemm_op
<
op
::
quant_dot
>
(
"quant_dot"
);
add_gemm_op
<
op
::
quant_dot
>
(
"quant_dot"
);
...
...
src/targets/gpu/mlir.cpp
View file @
97d4bb6c
...
@@ -389,14 +389,20 @@ struct mlir_program
...
@@ -389,14 +389,20 @@ struct mlir_program
mlir_operation_state
&
add_attributes
(
const
std
::
vector
<
named_attribute_t
>&
named_attrs
)
mlir_operation_state
&
add_attributes
(
const
std
::
vector
<
named_attribute_t
>&
named_attrs
)
{
{
auto
attributes
=
prog
->
name_attributes
(
named_attrs
);
auto
attributes
=
prog
->
name_attributes
(
named_attrs
);
if
(
not
attributes
.
empty
())
{
mlirOperationStateAddAttributes
(
&
op_state
,
attributes
.
size
(),
attributes
.
data
());
mlirOperationStateAddAttributes
(
&
op_state
,
attributes
.
size
(),
attributes
.
data
());
}
return
*
this
;
return
*
this
;
}
}
mlir_operation_state
&
add_attribute_value
(
const
value
&
v
)
mlir_operation_state
&
add_attribute_value
(
const
value
&
v
)
{
{
auto
attributes
=
prog
->
name_attributes
(
v
);
auto
attributes
=
prog
->
name_attributes
(
v
);
if
(
not
attributes
.
empty
())
{
mlirOperationStateAddAttributes
(
&
op_state
,
attributes
.
size
(),
attributes
.
data
());
mlirOperationStateAddAttributes
(
&
op_state
,
attributes
.
size
(),
attributes
.
data
());
}
return
*
this
;
return
*
this
;
}
}
...
@@ -419,13 +425,19 @@ struct mlir_program
...
@@ -419,13 +425,19 @@ struct mlir_program
return
shape
{
r
.
type
(),
r
.
lens
()};
return
shape
{
r
.
type
(),
r
.
lens
()};
});
});
auto
x
=
prog
->
make_tensors
(
reshaped
);
auto
x
=
prog
->
make_tensors
(
reshaped
);
if
(
not
x
.
empty
())
{
mlirOperationStateAddResults
(
&
op_state
,
x
.
size
(),
x
.
data
());
mlirOperationStateAddResults
(
&
op_state
,
x
.
size
(),
x
.
data
());
}
return
*
this
;
return
*
this
;
}
}
mlir_operation_state
&
add_operands
(
const
std
::
vector
<
MlirValue
>&
inputs
)
mlir_operation_state
&
add_operands
(
const
std
::
vector
<
MlirValue
>&
inputs
)
{
if
(
not
inputs
.
empty
())
{
{
mlirOperationStateAddOperands
(
&
op_state
,
inputs
.
size
(),
inputs
.
data
());
mlirOperationStateAddOperands
(
&
op_state
,
inputs
.
size
(),
inputs
.
data
());
}
return
*
this
;
return
*
this
;
}
}
...
@@ -435,7 +447,10 @@ struct mlir_program
...
@@ -435,7 +447,10 @@ struct mlir_program
std
::
transform
(
regions
.
begin
(),
regions
.
end
(),
mregions
.
begin
(),
[](
const
auto
&
r
)
{
std
::
transform
(
regions
.
begin
(),
regions
.
end
(),
mregions
.
begin
(),
[](
const
auto
&
r
)
{
return
r
.
get
();
return
r
.
get
();
});
});
if
(
not
mregions
.
empty
())
{
mlirOperationStateAddOwnedRegions
(
&
op_state
,
mregions
.
size
(),
mregions
.
data
());
mlirOperationStateAddOwnedRegions
(
&
op_state
,
mregions
.
size
(),
mregions
.
data
());
}
mlir_operation
op
(
mlirOperationCreate
(
&
op_state
));
mlir_operation
op
(
mlirOperationCreate
(
&
op_state
));
// Release memory since mlir_operation owns it
// Release memory since mlir_operation owns it
for
(
auto
&
r
:
regions
)
for
(
auto
&
r
:
regions
)
...
@@ -607,12 +622,12 @@ struct mlir_program
...
@@ -607,12 +622,12 @@ struct mlir_program
mlir_pass_manager
pm_back
{
mlirPassManagerCreate
(
ctx
.
get
())};
mlir_pass_manager
pm_back
{
mlirPassManagerCreate
(
ctx
.
get
())};
// 1st pipeline to call
// 1st pipeline to call
mlirMIGraphXAddHighLevelPipeline
(
pm_front
.
get
());
mlirMIGraphXAddHighLevelPipeline
(
pm_front
.
get
());
mlirPassManagerRun
(
pm_front
.
get
(),
mmodule
.
get
());
mlirPassManagerRun
OnOp
(
pm_front
.
get
(),
mlirModuleGetOperation
(
mmodule
.
get
())
)
;
// 2nd pipeline to call
// 2nd pipeline to call
get_module_tuned
();
get_module_tuned
();
mlirMIGraphXAddBackendPipeline
(
pm_back
.
get
(),
target_arch
.
c_str
());
mlirMIGraphXAddBackendPipeline
(
pm_back
.
get
(),
target_arch
.
c_str
());
mlirPassManagerRun
(
pm_back
.
get
(),
mmodule
.
get
());
mlirPassManagerRun
OnOp
(
pm_back
.
get
(),
mlirModuleGetOperation
(
mmodule
.
get
())
)
;
code_object_op
op
{};
code_object_op
op
{};
op
.
symbol_name
=
sym_name
;
op
.
symbol_name
=
sym_name
;
...
@@ -701,6 +716,11 @@ struct mlir_program
...
@@ -701,6 +716,11 @@ struct mlir_program
bool
get_module_tuned
()
const
bool
get_module_tuned
()
const
{
{
static
mlir_tuning_table
tuning_table
=
create_tuning_table
();
static
mlir_tuning_table
tuning_table
=
create_tuning_table
();
// The tuning table as currently implemented is currently not
// thread safe. This will be fixed in the future. For now,
// stick a mutex around all tuning table interaction.
static
std
::
mutex
lock
;
std
::
lock_guard
<
std
::
mutex
>
guard
(
lock
);
if
(
!
mlirRockTuningSetFromTable
(
tuning_table
.
get
(),
mmodule
.
get
()))
if
(
!
mlirRockTuningSetFromTable
(
tuning_table
.
get
(),
mmodule
.
get
()))
{
{
const
char
*
prob_config
=
mlirRockTuningGetKey
(
tuning_table
.
get
(),
mmodule
.
get
());
const
char
*
prob_config
=
mlirRockTuningGetKey
(
tuning_table
.
get
(),
mmodule
.
get
());
...
@@ -778,9 +798,6 @@ code_object_op compile_mlir(const context&, module m, const std::vector<instruct
...
@@ -778,9 +798,6 @@ code_object_op compile_mlir(const context&, module m, const std::vector<instruct
{
{
adjust_param_shapes
(
m
,
inputs
);
adjust_param_shapes
(
m
,
inputs
);
const
bool
trace
=
enabled
(
MIGRAPHX_TRACE_MLIR
{});
const
bool
trace
=
enabled
(
MIGRAPHX_TRACE_MLIR
{});
// set mutex while llvm thread support is disabled.
static
std
::
mutex
g_mlirc_mutex
;
// NOLINT
const
std
::
lock_guard
<
std
::
mutex
>
lock
(
g_mlirc_mutex
);
if
(
trace
)
if
(
trace
)
std
::
cout
<<
m
<<
std
::
endl
;
std
::
cout
<<
m
<<
std
::
endl
;
...
...
src/targets/gpu/rocblas.cpp
View file @
97d4bb6c
...
@@ -55,9 +55,16 @@ bool get_compute_fp32_flag()
...
@@ -55,9 +55,16 @@ bool get_compute_fp32_flag()
bool
get_int8_x4_format
(
context
&
ctx
)
bool
get_int8_x4_format
(
context
&
ctx
)
{
{
#if ROCBLAS_VERSION_MAJOR >= 3
(
void
)(
ctx
);
return
false
;
#else
// int8x4 packed format is only available starting from rocblas-v2.38 and it is deprecated in
// v3.0 and will be removed in v4.0
rocblas_gemm_flags
flag
;
rocblas_gemm_flags
flag
;
rocblas_query_int8_layout_flag
(
ctx
.
get_stream
().
get_rocblas
(),
&
flag
);
rocblas_query_int8_layout_flag
(
ctx
.
get_stream
().
get_rocblas
(),
&
flag
);
return
flag
==
rocblas_gemm_flags_pack_int8x4
;
return
flag
==
rocblas_gemm_flags_pack_int8x4
;
#endif
}
}
}
// namespace gpu
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace MIGRAPHX_INLINE_NS
...
...
src/targets/ref/lowering.cpp
View file @
97d4bb6c
...
@@ -27,7 +27,7 @@
...
@@ -27,7 +27,7 @@
#include <migraphx/dfor.hpp>
#include <migraphx/dfor.hpp>
#include <migraphx/op/identity.hpp>
#include <migraphx/op/identity.hpp>
#include <migraphx/op/convolution.hpp>
#include <migraphx/op/convolution.hpp>
#include <migraphx/op/
de
convolution.hpp>
#include <migraphx/op/convolution
_backwards
.hpp>
#include <migraphx/op/quant_convolution.hpp>
#include <migraphx/op/quant_convolution.hpp>
#include <migraphx/op/dot.hpp>
#include <migraphx/op/dot.hpp>
#include <migraphx/op/quant_dot.hpp>
#include <migraphx/op/quant_dot.hpp>
...
...
src/tf/parse_batchnorm.cpp
View file @
97d4bb6c
...
@@ -52,7 +52,6 @@ struct parse_batchnorm : op_parser<parse_batchnorm>
...
@@ -52,7 +52,6 @@ struct parse_batchnorm : op_parser<parse_batchnorm>
auto
x_type
=
args
[
0
]
->
get_shape
().
type
();
auto
x_type
=
args
[
0
]
->
get_shape
().
type
();
// unsqueeze tensors of shape (C) to broadcast correctly
// unsqueeze tensors of shape (C) to broadcast correctly
auto
rt
=
info
.
add_literal
(
migraphx
::
literal
{
migraphx
::
shape
{
x_type
},
{
0.5
}});
auto
eps
=
info
.
add_literal
(
migraphx
::
literal
{
migraphx
::
shape
{
x_type
},
{
epsilon
}});
auto
eps
=
info
.
add_literal
(
migraphx
::
literal
{
migraphx
::
shape
{
x_type
},
{
epsilon
}});
auto
scale_unsqueeze
=
auto
scale_unsqueeze
=
...
@@ -64,11 +63,11 @@ struct parse_batchnorm : op_parser<parse_batchnorm>
...
@@ -64,11 +63,11 @@ struct parse_batchnorm : op_parser<parse_batchnorm>
auto
var_unsqueeze
=
auto
var_unsqueeze
=
info
.
add_instruction
(
migraphx
::
make_op
(
"unsqueeze"
,
{{
"axes"
,
{
1
,
2
}}}),
args
[
4
]);
info
.
add_instruction
(
migraphx
::
make_op
(
"unsqueeze"
,
{{
"axes"
,
{
1
,
2
}}}),
args
[
4
]);
auto
numer
=
info
.
add_broadcastable_binary_op
(
"sub"
,
args
[
0
],
mean_unsqueeze
);
auto
x_sub_mean
=
info
.
add_broadcastable_binary_op
(
"sub"
,
args
[
0
],
mean_unsqueeze
);
auto
var_eps
=
info
.
add_broadcastable_binary_op
(
"add"
,
var_unsqueeze
,
eps
);
auto
var_eps
=
info
.
add_broadcastable_binary_op
(
"add"
,
var_unsqueeze
,
eps
);
auto
denom
=
info
.
add_
broadcastable_binary_op
(
"pow"
,
var_eps
,
rt
);
auto
rsqrt
=
info
.
add_
instruction
(
make_op
(
"rsqrt"
)
,
var_eps
);
auto
div0
=
info
.
add_broadcastable_binary_op
(
"
div"
,
numer
,
denom
);
auto
mul0
=
info
.
add_broadcastable_binary_op
(
"
mul"
,
scale_unsqueeze
,
rsqrt
);
auto
r0
=
info
.
add_broadcastable_binary_op
(
"mul"
,
div0
,
scale_unsqueeze
);
auto
r0
=
info
.
add_broadcastable_binary_op
(
"mul"
,
x_sub_mean
,
mul0
);
return
info
.
add_broadcastable_binary_op
(
"add"
,
r0
,
bias_unsqueeze
);
return
info
.
add_broadcastable_binary_op
(
"add"
,
r0
,
bias_unsqueeze
);
}
}
};
};
...
...
src/verify_args.cpp
View file @
97d4bb6c
...
@@ -35,7 +35,7 @@ bool verify_args(const std::string& name,
...
@@ -35,7 +35,7 @@ bool verify_args(const std::string& name,
bool
passed
=
true
;
bool
passed
=
true
;
visit_all
(
ref_arg
,
target_arg
)([
&
](
auto
ref
,
auto
target
)
{
visit_all
(
ref_arg
,
target_arg
)([
&
](
auto
ref
,
auto
target
)
{
double
error
;
double
error
;
passed
=
verify_range
(
ref
,
target
,
tolerance
,
&
error
);
passed
=
verify
::
verify_range
(
ref
,
target
,
tolerance
,
&
error
);
if
(
not
passed
)
if
(
not
passed
)
{
{
// TODO: Check for nans
// TODO: Check for nans
...
@@ -45,27 +45,27 @@ bool verify_args(const std::string& name,
...
@@ -45,27 +45,27 @@ bool verify_args(const std::string& name,
std
::
cout
<<
"ref:"
<<
ref
<<
std
::
endl
;
std
::
cout
<<
"ref:"
<<
ref
<<
std
::
endl
;
if
(
target
.
size
()
<
32
)
if
(
target
.
size
()
<
32
)
std
::
cout
<<
"target:"
<<
target
<<
std
::
endl
;
std
::
cout
<<
"target:"
<<
target
<<
std
::
endl
;
if
(
range_zero
(
ref
))
if
(
verify
::
range_zero
(
ref
))
std
::
cout
<<
"Ref data is all zeros"
<<
std
::
endl
;
std
::
cout
<<
"Ref data is all zeros"
<<
std
::
endl
;
if
(
range_zero
(
target
))
if
(
verify
::
range_zero
(
target
))
std
::
cout
<<
"Target data is all zeros"
<<
std
::
endl
;
std
::
cout
<<
"Target data is all zeros"
<<
std
::
endl
;
auto
mxdiff
=
max_diff
(
ref
,
target
);
auto
mxdiff
=
verify
::
max_diff
(
ref
,
target
);
std
::
cout
<<
"Max diff: "
<<
mxdiff
<<
std
::
endl
;
std
::
cout
<<
"Max diff: "
<<
mxdiff
<<
std
::
endl
;
auto
idx
=
mismatch_idx
(
ref
,
target
,
float_equal
);
auto
idx
=
verify
::
mismatch_idx
(
ref
,
target
,
float_equal
);
if
(
idx
<
range_distance
(
ref
))
if
(
idx
<
verify
::
range_distance
(
ref
))
{
{
std
::
cout
<<
"Mismatch at "
<<
idx
<<
": "
<<
ref
[
idx
]
<<
" != "
<<
target
[
idx
]
std
::
cout
<<
"Mismatch at "
<<
idx
<<
": "
<<
ref
[
idx
]
<<
" != "
<<
target
[
idx
]
<<
std
::
endl
;
<<
std
::
endl
;
}
}
auto
ref_nan_idx
=
find_idx
(
ref
,
not_finite
);
auto
ref_nan_idx
=
find_idx
(
ref
,
verify
::
not_finite
);
if
(
ref_nan_idx
>=
0
)
if
(
ref_nan_idx
>=
0
)
std
::
cout
<<
"Non finite number found in ref at "
<<
ref_nan_idx
<<
": "
std
::
cout
<<
"Non finite number found in ref at "
<<
ref_nan_idx
<<
": "
<<
ref
[
ref_nan_idx
]
<<
std
::
endl
;
<<
ref
[
ref_nan_idx
]
<<
std
::
endl
;
auto
target_nan_idx
=
find_idx
(
target
,
not_finite
);
auto
target_nan_idx
=
find_idx
(
target
,
verify
::
not_finite
);
if
(
target_nan_idx
>=
0
)
if
(
target_nan_idx
>=
0
)
std
::
cout
<<
"Non finite number found in target at "
<<
target_nan_idx
<<
": "
std
::
cout
<<
"Non finite number found in target at "
<<
target_nan_idx
<<
": "
<<
target
[
target_nan_idx
]
<<
std
::
endl
;
<<
target
[
target_nan_idx
]
<<
std
::
endl
;
...
@@ -73,27 +73,27 @@ bool verify_args(const std::string& name,
...
@@ -73,27 +73,27 @@ bool verify_args(const std::string& name,
}
}
else
else
{
{
if
(
range_zero
(
ref
))
if
(
verify
::
range_zero
(
ref
))
std
::
cout
<<
"Ref data is all zeros"
<<
std
::
endl
;
std
::
cout
<<
"Ref data is all zeros"
<<
std
::
endl
;
if
(
range_zero
(
target
))
if
(
verify
::
range_zero
(
target
))
std
::
cout
<<
"Target data is all zeros"
<<
std
::
endl
;
std
::
cout
<<
"Target data is all zeros"
<<
std
::
endl
;
// auto mxdiff = max_diff(ref, target);
// auto mxdiff = max_diff(ref, target);
// std::cout << "Max diff: " << mxdiff << std::endl;
// std::cout << "Max diff: " << mxdiff << std::endl;
// auto idx = mismatch_idx(ref, target, float_equal);
// auto idx = mismatch_idx(ref, target, float_equal);
// if(idx < range_distance(ref))
// if(idx <
verify::
range_distance(ref))
// {
// {
// std::cout << "Mismatch at " << idx << ": " << ref[idx] << " != " << target[idx]
// std::cout << "Mismatch at " << idx << ": " << ref[idx] << " != " << target[idx]
// << std::endl;
// << std::endl;
// }
// }
auto
ref_nan_idx
=
find_idx
(
ref
,
not_finite
);
auto
ref_nan_idx
=
find_idx
(
ref
,
verify
::
not_finite
);
if
(
ref_nan_idx
>=
0
)
if
(
ref_nan_idx
>=
0
)
std
::
cout
<<
"Non finite number found in ref at "
<<
ref_nan_idx
<<
": "
std
::
cout
<<
"Non finite number found in ref at "
<<
ref_nan_idx
<<
": "
<<
ref
[
ref_nan_idx
]
<<
std
::
endl
;
<<
ref
[
ref_nan_idx
]
<<
std
::
endl
;
auto
target_nan_idx
=
find_idx
(
target
,
not_finite
);
auto
target_nan_idx
=
find_idx
(
target
,
verify
::
not_finite
);
if
(
target_nan_idx
>=
0
)
if
(
target_nan_idx
>=
0
)
std
::
cout
<<
"Non finite number found in target at "
<<
target_nan_idx
<<
": "
std
::
cout
<<
"Non finite number found in target at "
<<
target_nan_idx
<<
": "
<<
target
[
target_nan_idx
]
<<
std
::
endl
;
<<
target
[
target_nan_idx
]
<<
std
::
endl
;
...
...
test/api/test_gpu.cpp
View file @
97d4bb6c
...
@@ -34,7 +34,6 @@ TEST_CASE(load_and_run)
...
@@ -34,7 +34,6 @@ TEST_CASE(load_and_run)
auto
shapes_before
=
p
.
get_output_shapes
();
auto
shapes_before
=
p
.
get_output_shapes
();
migraphx
::
compile_options
options
;
migraphx
::
compile_options
options
;
options
.
set_offload_copy
();
options
.
set_offload_copy
();
options
.
set_exhaustive_tune_flag
();
p
.
compile
(
migraphx
::
target
(
"gpu"
),
options
);
p
.
compile
(
migraphx
::
target
(
"gpu"
),
options
);
auto
shapes_after
=
p
.
get_output_shapes
();
auto
shapes_after
=
p
.
get_output_shapes
();
CHECK
(
shapes_before
.
size
()
==
1
);
CHECK
(
shapes_before
.
size
()
==
1
);
...
...
test/gpu/codegen_literal.cpp
View file @
97d4bb6c
...
@@ -80,7 +80,7 @@ TEST_CASE(mul_literal_round_test)
...
@@ -80,7 +80,7 @@ TEST_CASE(mul_literal_round_test)
migraphx
::
target
gpu_t
=
migraphx
::
make_target
(
"gpu"
);
migraphx
::
target
gpu_t
=
migraphx
::
make_target
(
"gpu"
);
run_prog
(
p
,
gpu_t
,
m
,
gpu_result
);
run_prog
(
p
,
gpu_t
,
m
,
gpu_result
);
EXPECT
(
migraphx
::
verify_range
(
ref_result
,
gpu_result
));
EXPECT
(
migraphx
::
verify
::
verify
_range
(
ref_result
,
gpu_result
));
}
}
int
main
(
int
argc
,
const
char
*
argv
[])
{
test
::
run
(
argc
,
argv
);
}
int
main
(
int
argc
,
const
char
*
argv
[])
{
test
::
run
(
argc
,
argv
);
}
test/gpu/manage_host_buffer.cpp
View file @
97d4bb6c
...
@@ -64,7 +64,7 @@ TEST_CASE(host_same_buffer_copy)
...
@@ -64,7 +64,7 @@ TEST_CASE(host_same_buffer_copy)
auto
result
=
p
.
eval
(
pp
).
back
();
auto
result
=
p
.
eval
(
pp
).
back
();
std
::
vector
<
float
>
results_vector
(
ss
.
elements
(),
-
1
);
std
::
vector
<
float
>
results_vector
(
ss
.
elements
(),
-
1
);
result
.
visit
([
&
](
auto
output
)
{
results_vector
.
assign
(
output
.
begin
(),
output
.
end
());
});
result
.
visit
([
&
](
auto
output
)
{
results_vector
.
assign
(
output
.
begin
(),
output
.
end
());
});
EXPECT
(
migraphx
::
verify_range
(
c_vec
,
results_vector
));
EXPECT
(
migraphx
::
verify
::
verify
_range
(
c_vec
,
results_vector
));
}
}
TEST_CASE
(
arguments_lifetime
)
TEST_CASE
(
arguments_lifetime
)
...
...
test/gpu/quantization.cpp
View file @
97d4bb6c
...
@@ -52,7 +52,7 @@ TEST_CASE(gpu_target_copy)
...
@@ -52,7 +52,7 @@ TEST_CASE(gpu_target_copy)
std
::
vector
<
int8_t
>
val_final
;
std
::
vector
<
int8_t
>
val_final
;
ref_arg_final
.
visit
([
&
](
auto
v
)
{
val_final
.
assign
(
v
.
begin
(),
v
.
end
());
});
ref_arg_final
.
visit
([
&
](
auto
v
)
{
val_final
.
assign
(
v
.
begin
(),
v
.
end
());
});
EXPECT
(
migraphx
::
verify_range
(
val_orig
,
val_final
));
EXPECT
(
migraphx
::
verify
::
verify
_range
(
val_orig
,
val_final
));
}
}
TEST_CASE
(
int8_quantization
)
TEST_CASE
(
int8_quantization
)
...
@@ -118,9 +118,9 @@ TEST_CASE(int8_quantization)
...
@@ -118,9 +118,9 @@ TEST_CASE(int8_quantization)
// the regular pipeline uses the rewrite_quantization in the much
// the regular pipeline uses the rewrite_quantization in the much
// earlier stage.
// earlier stage.
if
(
migraphx
::
gpu
::
mlir_enabled
())
if
(
migraphx
::
gpu
::
mlir_enabled
())
EXPECT
(
migraphx
::
verify_range
(
ref_result
,
gpu_result
,
1e5
));
EXPECT
(
migraphx
::
verify
::
verify
_range
(
ref_result
,
gpu_result
,
1e5
));
else
else
EXPECT
(
migraphx
::
verify_range
(
ref_result
,
gpu_result
));
EXPECT
(
migraphx
::
verify
::
verify
_range
(
ref_result
,
gpu_result
));
}
}
}
}
...
...
test/multi_target/multitarget_test.cpp
View file @
97d4bb6c
This diff is collapsed.
Click to expand it.
test/onnx/.onnxrt-commit
View file @
97d4bb6c
3be6eb53c8b359703cb645ed2cb1cdf106924b7c
d3295f4329d744fe1f8419e1220e123807282b99
test/onnx/conv_transpose_auto_pad_test.onnx
0 → 100644
View file @
97d4bb6c
conv_transpose_auto_pad_test:±
:
x
wyconv1" ConvTranspose*
auto_pad"
SAME_UPPER conv_transpose_auto_pad_testZ
x
Z
w
b
y
B
\ No newline at end of file
test/onnx/
de
conv_bias_test.onnx
→
test/onnx/conv_
transpose_
bias_test.onnx
View file @
97d4bb6c
deconv
_bias_test:
ž
conv_transpose
_bias_test:
¦
"
"
x
x
w
w
byconv1" ConvTranspose
de
conv_bias_testZ
byconv1" ConvTranspose
conv_
transpose_
bias_testZ
x
x
...
@@ -24,4 +24,4 @@
...
@@ -24,4 +24,4 @@
B
B
\ No newline at end of file
test/onnx/conv_transpose_dyn_asym_padding_test.onnx
0 → 100644
View file @
97d4bb6c
File added
Prev
1
2
3
4
5
6
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment