Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
23cb7917
"tests/data/streamResult.log" did not exist on "62a2913497a866754ae96d57ef445d8cec6e89b2"
Unverified
Commit
23cb7917
authored
Aug 16, 2023
by
Brian Pickrell
Committed by
GitHub
Aug 16, 2023
Browse files
Merge branch 'develop' into blas_tuning
parents
b5fcc0bc
ea32ca70
Changes
458
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
377 additions
and
160 deletions
+377
-160
src/targets/gpu/kernels/include/migraphx/kernels/type_traits.hpp
...gets/gpu/kernels/include/migraphx/kernels/type_traits.hpp
+11
-3
src/targets/gpu/kernels/include/migraphx/kernels/vec.hpp
src/targets/gpu/kernels/include/migraphx/kernels/vec.hpp
+1
-1
src/targets/gpu/lowering.cpp
src/targets/gpu/lowering.cpp
+18
-12
src/targets/gpu/mlir.cpp
src/targets/gpu/mlir.cpp
+216
-72
src/targets/gpu/rocblas.cpp
src/targets/gpu/rocblas.cpp
+8
-16
src/targets/gpu/target.cpp
src/targets/gpu/target.cpp
+17
-8
src/targets/gpu/time_op.cpp
src/targets/gpu/time_op.cpp
+2
-4
src/targets/ref/CMakeLists.txt
src/targets/ref/CMakeLists.txt
+2
-0
src/targets/ref/include/migraphx/ref/context.hpp
src/targets/ref/include/migraphx/ref/context.hpp
+1
-0
src/targets/ref/include/migraphx/ref/lowering.hpp
src/targets/ref/include/migraphx/ref/lowering.hpp
+2
-2
src/targets/ref/include/migraphx/ref/target.hpp
src/targets/ref/include/migraphx/ref/target.hpp
+1
-1
src/targets/ref/lowering.cpp
src/targets/ref/lowering.cpp
+1
-1
src/tf/CMakeLists.txt
src/tf/CMakeLists.txt
+2
-1
src/tf/op_parser.cpp
src/tf/op_parser.cpp
+1
-0
src/tf/parse_batchnorm.cpp
src/tf/parse_batchnorm.cpp
+5
-6
src/tf/tf.cpp
src/tf/tf.cpp
+3
-0
src/tf/tf_parser.cpp
src/tf/tf_parser.cpp
+1
-1
src/value.cpp
src/value.cpp
+34
-1
src/verify_args.cpp
src/verify_args.cpp
+13
-13
test/CMakeLists.txt
test/CMakeLists.txt
+38
-18
No files found.
src/targets/gpu/kernels/include/migraphx/kernels/type_traits.hpp
View file @
23cb7917
...
@@ -218,7 +218,15 @@ using common_type_t = typename common_type<Ts...>::type;
...
@@ -218,7 +218,15 @@ using common_type_t = typename common_type<Ts...>::type;
#define MIGRAPHX_REQUIRES(...) class = enable_if_t<__VA_ARGS__>
#define MIGRAPHX_REQUIRES(...) class = enable_if_t<__VA_ARGS__>
constexpr
unsigned
long
int_max
(
unsigned
long
n
)
{
return
(
1u
<<
(
n
*
8
))
-
1
;
}
constexpr
unsigned
long
int_max
(
unsigned
long
n
)
{
// Note, left shift cannot be used to get the maximum value of int64_type or
// uint64_type because it is undefined behavior to left shift 64 bits for
// these types
if
(
n
==
sizeof
(
int64_t
))
return
-
1
;
return
(
1ul
<<
(
n
*
8
))
-
1
;
}
template
<
class
T
,
template
<
class
T
,
MIGRAPHX_REQUIRES
(
is_integral
<
T
>{}
or
is_floating_point
<
T
>
{}
or
MIGRAPHX_REQUIRES
(
is_integral
<
T
>{}
or
is_floating_point
<
T
>
{}
or
...
@@ -228,9 +236,9 @@ constexpr T numeric_max()
...
@@ -228,9 +236,9 @@ constexpr T numeric_max()
if
constexpr
(
is_integral
<
T
>
{})
if
constexpr
(
is_integral
<
T
>
{})
{
{
if
constexpr
(
is_unsigned
<
T
>
{})
if
constexpr
(
is_unsigned
<
T
>
{})
return
int_max
(
sizeof
(
T
))
*
2
;
else
return
int_max
(
sizeof
(
T
));
return
int_max
(
sizeof
(
T
));
else
return
int_max
(
sizeof
(
T
))
/
2
;
}
}
else
if
constexpr
(
is_same
<
T
,
double
>
{})
else
if
constexpr
(
is_same
<
T
,
double
>
{})
return
__DBL_MAX__
;
return
__DBL_MAX__
;
...
...
src/targets/gpu/kernels/include/migraphx/kernels/vec.hpp
View file @
23cb7917
...
@@ -135,7 +135,7 @@ constexpr vec<vec_type<T>, N> vec_packed_at(T x, I i)
...
@@ -135,7 +135,7 @@ constexpr vec<vec_type<T>, N> vec_packed_at(T x, I i)
return
vec
<
T
,
N
>
{
x
};
return
vec
<
T
,
N
>
{
x
};
else
else
{
{
MIGRAPHX_ASSERT
((
i
+
N
)
<
vec_size
<
T
>
());
MIGRAPHX_ASSERT
((
i
+
N
)
<
=
vec_size
<
T
>
());
vec
<
vec_type
<
T
>
,
N
>
result
=
{
0
};
vec
<
vec_type
<
T
>
,
N
>
result
=
{
0
};
for
(
int
j
=
0
;
j
<
N
;
j
++
)
for
(
int
j
=
0
;
j
<
N
;
j
++
)
{
{
...
...
src/targets/gpu/lowering.cpp
View file @
23cb7917
...
@@ -22,12 +22,19 @@
...
@@ -22,12 +22,19 @@
* THE SOFTWARE.
* THE SOFTWARE.
*/
*/
#include <iterator>
#include <iterator>
#include <migraphx/gpu/lowering.hpp>
#include <utility>
#include <functional>
#include <algorithm>
#include <map>
#include <migraphx/manage_ptr.hpp>
#include <migraphx/manage_ptr.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/instruction_ref.hpp>
#include <migraphx/instruction_ref.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/pass_manager.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/program.hpp>
#include <migraphx/op/dot.hpp>
#include <migraphx/op/dot.hpp>
#include <migraphx/op/if_op.hpp>
#include <migraphx/op/if_op.hpp>
...
@@ -35,17 +42,12 @@
...
@@ -35,17 +42,12 @@
#include <migraphx/op/quant_dot.hpp>
#include <migraphx/op/quant_dot.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/lowering.hpp>
#include <migraphx/gpu/device_name.hpp>
#include <migraphx/gpu/device_name.hpp>
#include <migraphx/gpu/gemm.hpp>
#include <migraphx/gpu/gemm.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <migraphx/gpu/rocblas.hpp>
#include <migraphx/gpu/rocblas.hpp>
#include <migraphx/gpu/compiler.hpp>
#include <migraphx/gpu/compiler.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/program.hpp>
#include <utility>
#include <functional>
#include <algorithm>
#include <map>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
@@ -53,8 +55,9 @@ namespace gpu {
...
@@ -53,8 +55,9 @@ namespace gpu {
struct
miopen_apply
struct
miopen_apply
{
{
module
*
mod
=
nullptr
;
module
*
mod
=
nullptr
;
const
lowering
*
pass
=
nullptr
;
module_pass_manager
*
mpm
=
nullptr
;
const
lowering
*
pass
=
nullptr
;
std
::
unordered_map
<
std
::
string
,
std
::
function
<
instruction_ref
(
instruction_ref
)
>>
apply_map
{};
std
::
unordered_map
<
std
::
string
,
std
::
function
<
instruction_ref
(
instruction_ref
)
>>
apply_map
{};
instruction_ref
last
{};
instruction_ref
last
{};
bool
offload_copy
=
false
;
bool
offload_copy
=
false
;
...
@@ -83,7 +86,7 @@ struct miopen_apply
...
@@ -83,7 +86,7 @@ struct miopen_apply
auto
&
ctx
=
get_context
();
auto
&
ctx
=
get_context
();
int8_x4_format
=
get_int8_x4_format
(
ctx
);
int8_x4_format
=
get_int8_x4_format
(
ctx
);
compute_fp32
=
get_compute_fp32_flag
();
compute_fp32
=
get_compute_fp32_flag
();
offload_copy
=
(
mod
->
name
()
==
"main"
)
?
pass
->
offload_copy
:
false
;
offload_copy
=
(
mod
==
mpm
->
get_root_module
()
)
?
pass
->
offload_copy
:
false
;
add_generic_op
(
"contiguous"
);
add_generic_op
(
"contiguous"
);
...
@@ -103,7 +106,7 @@ struct miopen_apply
...
@@ -103,7 +106,7 @@ struct miopen_apply
add_extend_op
(
"topk"
);
add_extend_op
(
"topk"
);
add_convolution_op
(
"convolution"
);
add_convolution_op
(
"convolution"
);
add_convolution_op
(
"
de
convolution"
);
add_convolution_op
(
"convolution
_backwards
"
);
add_convolution_op
(
"quant_convolution"
);
add_convolution_op
(
"quant_convolution"
);
add_gemm_op
<
op
::
dot
>
(
"dot"
);
add_gemm_op
<
op
::
dot
>
(
"dot"
);
add_gemm_op
<
op
::
quant_dot
>
(
"quant_dot"
);
add_gemm_op
<
op
::
quant_dot
>
(
"quant_dot"
);
...
@@ -375,7 +378,10 @@ struct miopen_apply
...
@@ -375,7 +378,10 @@ struct miopen_apply
}
}
};
};
void
lowering
::
apply
(
module
&
m
)
const
{
miopen_apply
{
&
m
,
this
}.
apply
();
}
void
lowering
::
apply
(
module_pass_manager
&
mpm
)
const
{
miopen_apply
{
&
mpm
.
get_module
(),
&
mpm
,
this
}.
apply
();
}
}
// namespace gpu
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace MIGRAPHX_INLINE_NS
...
...
src/targets/gpu/mlir.cpp
View file @
23cb7917
...
@@ -36,7 +36,10 @@
...
@@ -36,7 +36,10 @@
#include <mutex>
#include <mutex>
#if !defined(MLIR_MIGRAPHX_DIALECT_API_VERSION) || MLIR_MIGRAPHX_DIALECT_API_VERSION != 3
#if !defined(MLIR_MIGRAPHX_DIALECT_API_VERSION) || MLIR_MIGRAPHX_DIALECT_API_VERSION != 3
#warning "Incompatible version of rocMLIR library used, disabling"
#warning "Incompatible version of rocMLIR library used, disabling"
// Only undefine when not using cppcheck
#ifndef CPPCHECK
#undef MIGRAPHX_MLIR
#undef MIGRAPHX_MLIR
#endif
#else
#else
#include <mlir-c/RegisterRocMLIR.h>
#include <mlir-c/RegisterRocMLIR.h>
#endif
#endif
...
@@ -50,8 +53,10 @@
...
@@ -50,8 +53,10 @@
#include <migraphx/ranges.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/gpu/code_object_op.hpp>
#include <migraphx/gpu/code_object_op.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/compile_gen.hpp>
#include <migraphx/gpu/device_name.hpp>
#include <migraphx/gpu/device_name.hpp>
#include <migraphx/gpu/perfdb.hpp>
#include <migraphx/gpu/perfdb.hpp>
#include <migraphx/gpu/tuning_config.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/permutation.hpp>
#include <migraphx/permutation.hpp>
#include <deque>
#include <deque>
...
@@ -121,7 +126,10 @@ struct mlir_handle
...
@@ -121,7 +126,10 @@ struct mlir_handle
#define MIGRAPHX_MANAGE_MLIR_HANDLE(T, F) migraphx::gpu::mlir_handle<T, decltype(&F), &F> // NOLINT
#define MIGRAPHX_MANAGE_MLIR_HANDLE(T, F) migraphx::gpu::mlir_handle<T, decltype(&F), &F> // NOLINT
using
mlir_context
=
MIGRAPHX_MANAGE_MLIR_HANDLE
(
MlirContext
,
mlirContextDestroy
);
using
mlir_context
=
MIGRAPHX_MANAGE_MLIR_HANDLE
(
MlirContext
,
mlirContextDestroy
);
using
mlir_thread_pool
=
MIGRAPHX_MANAGE_MLIR_HANDLE
(
MlirLlvmThreadPool
,
mlirLlvmThreadPoolDestroy
);
using
mlir_dialect_registry
=
MIGRAPHX_MANAGE_MLIR_HANDLE
(
MlirDialectRegistry
,
mlirDialectRegistryDestroy
);
using
mlir_module
=
MIGRAPHX_MANAGE_MLIR_HANDLE
(
MlirModule
,
mlirModuleDestroy
);
using
mlir_module
=
MIGRAPHX_MANAGE_MLIR_HANDLE
(
MlirModule
,
mlirModuleDestroy
);
using
mlir_operation
=
MIGRAPHX_MANAGE_MLIR_HANDLE
(
MlirOperation
,
mlirOperationDestroy
);
using
mlir_operation
=
MIGRAPHX_MANAGE_MLIR_HANDLE
(
MlirOperation
,
mlirOperationDestroy
);
using
mlir_op_printing_flags
=
MIGRAPHX_MANAGE_MLIR_HANDLE
(
MlirOpPrintingFlags
,
using
mlir_op_printing_flags
=
MIGRAPHX_MANAGE_MLIR_HANDLE
(
MlirOpPrintingFlags
,
...
@@ -131,6 +139,10 @@ using mlir_block = MIGRAPHX_MANAGE_MLIR_HANDLE(MlirBlock, mlirBlockD
...
@@ -131,6 +139,10 @@ using mlir_block = MIGRAPHX_MANAGE_MLIR_HANDLE(MlirBlock, mlirBlockD
using
mlir_pass_manager
=
MIGRAPHX_MANAGE_MLIR_HANDLE
(
MlirPassManager
,
mlirPassManagerDestroy
);
using
mlir_pass_manager
=
MIGRAPHX_MANAGE_MLIR_HANDLE
(
MlirPassManager
,
mlirPassManagerDestroy
);
using
mlir_tuning_table
=
MIGRAPHX_MANAGE_MLIR_HANDLE
(
MlirRockTuningTable
,
using
mlir_tuning_table
=
MIGRAPHX_MANAGE_MLIR_HANDLE
(
MlirRockTuningTable
,
mlirRockTuningTableDestroy
);
mlirRockTuningTableDestroy
);
using
mlir_tuning_space
=
MIGRAPHX_MANAGE_MLIR_HANDLE
(
MlirRockTuningSpace
,
mlirRockTuningSpaceDestroy
);
using
mlir_tuning_param
=
MIGRAPHX_MANAGE_MLIR_HANDLE
(
MlirRockTuningParam
,
mlirRockTuningParamDestroy
);
std
::
string_view
to_string_view
(
MlirStringRef
s
)
{
return
{
s
.
data
,
s
.
length
};
}
std
::
string_view
to_string_view
(
MlirStringRef
s
)
{
return
{
s
.
data
,
s
.
length
};
}
...
@@ -164,25 +176,41 @@ std::string mlir_print(F f, T x)
...
@@ -164,25 +176,41 @@ std::string mlir_print(F f, T x)
return
ss
.
str
();
return
ss
.
str
();
}
}
const
std
::
unordered_set
<
std
::
string
>&
get_xdlops_archs
()
{
static
std
::
unordered_set
<
std
::
string
>
supported_archs
{
"gfx908"
,
"gfx90a"
};
return
supported_archs
;
}
struct
mlir_program
struct
mlir_program
{
{
mlir_program
()
mlir_program
()
:
ctx
(
mlirContextCreate
()),
:
ctx
(
mlirContextCreateWithRegistry
(
get_dialect_registry
().
get
(),
/*threadingEnable=*/
false
)),
location
(
mlirLocationUnknownGet
(
ctx
.
get
())),
location
(
mlirLocationUnknownGet
(
ctx
.
get
())),
mmodule
(
mlirModuleCreateEmpty
(
location
))
mmodule
(
mlirModuleCreateEmpty
(
location
))
{
{
MlirDialectRegistry
registry
=
mlirDialectRegistryCreate
();
mlirContextSetThreadPool
(
ctx
.
get
(),
get_thread_pool
().
get
());
mlirRegisterRocMLIRDialects
(
registry
);
mlirContextAppendDialectRegistry
(
ctx
.
get
(),
registry
);
mlirContextLoadAllAvailableDialects
(
ctx
.
get
());
mlirContextLoadAllAvailableDialects
(
ctx
.
get
());
mlirDialectRegistryDestroy
(
registry
);
}
mlirContextSetAllowUnregisteredDialects
(
ctx
.
get
(),
true
/*allow*/
);
static
mlir_dialect_registry
&
get_dialect_registry
()
{
static
std
::
once_flag
init_guard
;
static
mlir_dialect_registry
the_registry
;
// The MLIR registration functions (for dialects and passes) are not
// necessarily thread-safe and need to be executed exactly once
// (especially since they eventually call non-thread-safe LLVM
// initilizations).
std
::
call_once
(
init_guard
,
[
&
]()
{
the_registry
=
mlirDialectRegistryCreate
();
mlirRegisterRocMLIRDialects
(
the_registry
.
get
());
mlirRegisterRocMLIRPasses
();
});
return
the_registry
;
}
static
mlir_thread_pool
&
get_thread_pool
()
{
// To save on overhead, we create one LLVM thread pool and reuse it
// across all MLIR contexts as recommended by MLIR upstream.
// Note that this is thread-safe as of C++11.
static
mlir_thread_pool
the_pool
=
mlirLlvmThreadPoolCreate
();
return
the_pool
;
}
}
MlirType
make_type
(
shape
::
type_t
t
)
const
MlirType
make_type
(
shape
::
type_t
t
)
const
...
@@ -244,8 +272,6 @@ struct mlir_program
...
@@ -244,8 +272,6 @@ struct mlir_program
MlirAttribute
attribute
(
std
::
int64_t
i
)
const
MlirAttribute
attribute
(
std
::
int64_t
i
)
const
{
{
if
(
i
<
0
)
MIGRAPHX_THROW
(
"MLIR cant handle negative values since they are ambiguous"
);
return
mlirIntegerAttrGet
(
mlirIntegerTypeGet
(
ctx
.
get
(),
64
),
i
);
return
mlirIntegerAttrGet
(
mlirIntegerTypeGet
(
ctx
.
get
(),
64
),
i
);
}
}
MlirAttribute
attribute
(
std
::
uint64_t
i
)
const
MlirAttribute
attribute
(
std
::
uint64_t
i
)
const
...
@@ -324,7 +350,8 @@ struct mlir_program
...
@@ -324,7 +350,8 @@ struct mlir_program
std
::
string
,
std
::
string
,
value
,
value
,
std
::
vector
<
value
>
,
std
::
vector
<
value
>
,
MlirType
>
;
MlirType
,
MlirAttribute
>
;
using
named_attribute_t
=
std
::
pair
<
std
::
string_view
,
attribute_t
>
;
using
named_attribute_t
=
std
::
pair
<
std
::
string_view
,
attribute_t
>
;
MlirNamedAttribute
name_attribute
(
const
named_attribute_t
&
na
)
const
MlirNamedAttribute
name_attribute
(
const
named_attribute_t
&
na
)
const
...
@@ -365,14 +392,20 @@ struct mlir_program
...
@@ -365,14 +392,20 @@ struct mlir_program
mlir_operation_state
&
add_attributes
(
const
std
::
vector
<
named_attribute_t
>&
named_attrs
)
mlir_operation_state
&
add_attributes
(
const
std
::
vector
<
named_attribute_t
>&
named_attrs
)
{
{
auto
attributes
=
prog
->
name_attributes
(
named_attrs
);
auto
attributes
=
prog
->
name_attributes
(
named_attrs
);
mlirOperationStateAddAttributes
(
&
op_state
,
attributes
.
size
(),
attributes
.
data
());
if
(
not
attributes
.
empty
())
{
mlirOperationStateAddAttributes
(
&
op_state
,
attributes
.
size
(),
attributes
.
data
());
}
return
*
this
;
return
*
this
;
}
}
mlir_operation_state
&
add_attribute_value
(
const
value
&
v
)
mlir_operation_state
&
add_attribute_value
(
const
value
&
v
)
{
{
auto
attributes
=
prog
->
name_attributes
(
v
);
auto
attributes
=
prog
->
name_attributes
(
v
);
mlirOperationStateAddAttributes
(
&
op_state
,
attributes
.
size
(),
attributes
.
data
());
if
(
not
attributes
.
empty
())
{
mlirOperationStateAddAttributes
(
&
op_state
,
attributes
.
size
(),
attributes
.
data
());
}
return
*
this
;
return
*
this
;
}
}
...
@@ -395,13 +428,19 @@ struct mlir_program
...
@@ -395,13 +428,19 @@ struct mlir_program
return
shape
{
r
.
type
(),
r
.
lens
()};
return
shape
{
r
.
type
(),
r
.
lens
()};
});
});
auto
x
=
prog
->
make_tensors
(
reshaped
);
auto
x
=
prog
->
make_tensors
(
reshaped
);
mlirOperationStateAddResults
(
&
op_state
,
x
.
size
(),
x
.
data
());
if
(
not
x
.
empty
())
{
mlirOperationStateAddResults
(
&
op_state
,
x
.
size
(),
x
.
data
());
}
return
*
this
;
return
*
this
;
}
}
mlir_operation_state
&
add_operands
(
const
std
::
vector
<
MlirValue
>&
inputs
)
mlir_operation_state
&
add_operands
(
const
std
::
vector
<
MlirValue
>&
inputs
)
{
{
mlirOperationStateAddOperands
(
&
op_state
,
inputs
.
size
(),
inputs
.
data
());
if
(
not
inputs
.
empty
())
{
mlirOperationStateAddOperands
(
&
op_state
,
inputs
.
size
(),
inputs
.
data
());
}
return
*
this
;
return
*
this
;
}
}
...
@@ -411,7 +450,10 @@ struct mlir_program
...
@@ -411,7 +450,10 @@ struct mlir_program
std
::
transform
(
regions
.
begin
(),
regions
.
end
(),
mregions
.
begin
(),
[](
const
auto
&
r
)
{
std
::
transform
(
regions
.
begin
(),
regions
.
end
(),
mregions
.
begin
(),
[](
const
auto
&
r
)
{
return
r
.
get
();
return
r
.
get
();
});
});
mlirOperationStateAddOwnedRegions
(
&
op_state
,
mregions
.
size
(),
mregions
.
data
());
if
(
not
mregions
.
empty
())
{
mlirOperationStateAddOwnedRegions
(
&
op_state
,
mregions
.
size
(),
mregions
.
data
());
}
mlir_operation
op
(
mlirOperationCreate
(
&
op_state
));
mlir_operation
op
(
mlirOperationCreate
(
&
op_state
));
// Release memory since mlir_operation owns it
// Release memory since mlir_operation owns it
for
(
auto
&
r
:
regions
)
for
(
auto
&
r
:
regions
)
...
@@ -468,7 +510,8 @@ struct mlir_program
...
@@ -468,7 +510,8 @@ struct mlir_program
ops
.
add_attributes
({{
"function_type"
,
make_function_type
(
inputs
,
outputs
)},
ops
.
add_attributes
({{
"function_type"
,
make_function_type
(
inputs
,
outputs
)},
{
"sym_name"
,
sym_name
},
{
"sym_name"
,
sym_name
},
{
"kernel"
,
std
::
string
(
"mixr"
)},
{
"kernel"
,
std
::
string
(
"mixr"
)},
{
"arch"
,
target_arch
}});
{
"arch"
,
target_arch
},
{
"num_cu"
,
num_cu
}});
ops
.
add_region
(
std
::
move
(
region
));
ops
.
add_region
(
std
::
move
(
region
));
insert
(
body
,
std
::
move
(
ops
));
insert
(
body
,
std
::
move
(
ops
));
...
@@ -481,6 +524,10 @@ struct mlir_program
...
@@ -481,6 +524,10 @@ struct mlir_program
{
{
if
(
ins
->
name
()
==
"@return"
)
if
(
ins
->
name
()
==
"@return"
)
return
"func.return"
;
return
"func.return"
;
if
(
ins
->
name
()
==
"@literal"
)
{
return
"tosa.const"
;
}
return
"migraphx."
+
ins
->
name
();
return
"migraphx."
+
ins
->
name
();
}
}
...
@@ -511,14 +558,7 @@ struct mlir_program
...
@@ -511,14 +558,7 @@ struct mlir_program
static
std
::
string
get_symbol_name
(
const
module
&
m
)
static
std
::
string
get_symbol_name
(
const
module
&
m
)
{
{
for
(
auto
ins
:
iterator_for
(
m
))
return
"mlir_"
+
gen
::
generate_name_from_ops
(
m
);
{
if
(
ins
->
name
()
==
"convolution"
or
ins
->
name
()
==
"dot"
)
{
return
"mlir_"
+
ins
->
name
();
}
}
return
"main"
;
}
}
void
parse
(
const
module
&
m
)
void
parse
(
const
module
&
m
)
...
@@ -532,20 +572,28 @@ struct mlir_program
...
@@ -532,20 +572,28 @@ struct mlir_program
{
{
if
(
ins
->
name
()
==
"@param"
)
if
(
ins
->
name
()
==
"@param"
)
continue
;
continue
;
if
(
ins
->
name
()
==
"contiguous"
)
{
ins_map
[
ins
]
=
ins_map
[
ins
->
inputs
().
at
(
0
)];
continue
;
}
auto
name
=
get_name
(
ins
);
auto
name
=
get_name
(
ins
);
auto
ops
=
create_operation_state
(
name
);
auto
ops
=
create_operation_state
(
name
);
ops
.
add_attribute_value
(
get_operator_value
(
ins
->
get_operator
()));
ops
.
add_attribute_value
(
get_operator_value
(
ins
->
get_operator
()));
if
(
ins
->
name
()
!=
"@return"
)
if
(
ins
->
name
()
!=
"@return"
)
ops
.
add_results
({
get_shape
(
ins
)});
ops
.
add_results
({
get_shape
(
ins
)});
if
(
ins
->
name
()
==
"@literal"
)
{
literal
r
=
ins
->
get_literal
();
MlirType
tensor_type
=
make_tensor
(
ins
->
get_shape
());
MlirAttribute
mlir_value_attr
=
mlirDenseElementsAttrRawBufferGet
(
tensor_type
,
r
.
get_shape
().
bytes
(),
r
.
data
());
ops
.
add_attributes
({{
"value"
,
mlir_value_attr
}});
}
if
(
ins
->
name
()
==
"convolution"
or
ins
->
name
()
==
"dot"
)
if
(
ins
->
name
()
==
"convolution"
or
ins
->
name
()
==
"dot"
)
{
{
pp
=
pp
=
problem_params
{
ins
->
get_operator
(),
to_shapes
(
ins
->
inputs
()),
ins
->
get_shape
()};
problem_params
{
ins
->
get_operator
(),
to_shapes
(
ins
->
inputs
()),
ins
->
get_shape
()};
// check if HW supports xdlops
auto
target_chip
=
trim
(
split_string
(
target_arch
,
':'
).
front
());
bool
xdlops
=
contains
(
get_xdlops_archs
(),
target_chip
);
if
(
xdlops
)
ops
.
add_attributes
({{
"xdlopsV2"
,
true
}});
}
}
std
::
vector
<
MlirValue
>
inputs
;
std
::
vector
<
MlirValue
>
inputs
;
...
@@ -562,18 +610,30 @@ struct mlir_program
...
@@ -562,18 +610,30 @@ struct mlir_program
}
}
}
}
code_object_op
compil
e
()
MIGRAPHX_TIDY_CONST
void
run_high_level_pipelin
e
()
MIGRAPHX_TIDY_CONST
{
{
mlir_pass_manager
pm_front
{
mlirPassManagerCreate
(
ctx
.
get
())};
mlir_pass_manager
pm_front
{
mlirPassManagerCreate
(
ctx
.
get
())};
mlir_pass_manager
pm_back
{
mlirPassManagerCreate
(
ctx
.
get
())};
// 1st pipeline to call
mlirMIGraphXAddHighLevelPipeline
(
pm_front
.
get
());
mlirMIGraphXAddHighLevelPipeline
(
pm_front
.
get
());
mlirPassManagerRun
(
pm_front
.
get
(),
mmodule
.
get
());
mlirPassManagerRunOnOp
(
pm_front
.
get
(),
mlirModuleGetOperation
(
mmodule
.
get
()));
}
// 2nd pipeline to call
void
run_backend_pipeline
()
MIGRAPHX_TIDY_CONST
get_module_tuned
();
{
mlir_pass_manager
pm_back
{
mlirPassManagerCreate
(
ctx
.
get
())};
mlirMIGraphXAddBackendPipeline
(
pm_back
.
get
(),
target_arch
.
c_str
());
mlirMIGraphXAddBackendPipeline
(
pm_back
.
get
(),
target_arch
.
c_str
());
mlirPassManagerRun
(
pm_back
.
get
(),
mmodule
.
get
());
mlirPassManagerRunOnOp
(
pm_back
.
get
(),
mlirModuleGetOperation
(
mmodule
.
get
()));
}
code_object_op
compile
(
const
value
&
solution
)
MIGRAPHX_TIDY_CONST
{
// 1st pipeline to call
run_high_level_pipeline
();
if
(
solution
.
is_null
())
get_module_tuned
();
else
set_tuning
(
solution
);
// 2nd pipeline to call
run_backend_pipeline
();
code_object_op
op
{};
code_object_op
op
{};
op
.
symbol_name
=
sym_name
;
op
.
symbol_name
=
sym_name
;
...
@@ -582,7 +642,12 @@ struct mlir_program
...
@@ -582,7 +642,12 @@ struct mlir_program
return
op
;
return
op
;
}
}
void
find_target
()
{
target_arch
=
get_device_name
();
}
void
set_gpu_properties
(
const
context
&
migraphx_ctx
)
{
const
auto
&
device
=
migraphx_ctx
.
get_current_device
();
target_arch
=
device
.
get_device_name
();
num_cu
=
device
.
get_cu_count
();
}
std
::
pair
<
std
::
size_t
,
std
::
size_t
>
get_launch_params
()
const
std
::
pair
<
std
::
size_t
,
std
::
size_t
>
get_launch_params
()
const
{
{
...
@@ -596,7 +661,7 @@ struct mlir_program
...
@@ -596,7 +661,7 @@ struct mlir_program
value
::
binary
get_binary
()
const
value
::
binary
get_binary
()
const
{
{
in
t
size
=
0
;
size_
t
size
=
0
;
mlirGetBinary
(
mmodule
.
get
(),
&
size
,
nullptr
);
mlirGetBinary
(
mmodule
.
get
(),
&
size
,
nullptr
);
value
::
binary
result
(
size
);
value
::
binary
result
(
size
);
if
(
mlirGetBinary
(
mmodule
.
get
(),
&
size
,
reinterpret_cast
<
char
*>
(
result
.
data
())))
if
(
mlirGetBinary
(
mmodule
.
get
(),
&
size
,
reinterpret_cast
<
char
*>
(
result
.
data
())))
...
@@ -604,14 +669,52 @@ struct mlir_program
...
@@ -604,14 +669,52 @@ struct mlir_program
MIGRAPHX_THROW
(
"Failed to compile mlir program"
);
MIGRAPHX_THROW
(
"Failed to compile mlir program"
);
}
}
void
set_tuning
(
const
value
&
v
)
MIGRAPHX_TIDY_CONST
{
const
auto
*
str
=
v
.
if_string
();
if
(
str
==
nullptr
)
MIGRAPHX_THROW
(
"mlir tuning solutions must be strings"
);
if
(
not
mlirRockTuningSetFromStr
(
mmodule
.
get
(),
make_mlir_string_ref
(
*
str
)))
MIGRAPHX_THROW
(
"Failed setting tuning key: "
+
*
str
);
}
tuning_config
get_tuning_config
()
MIGRAPHX_TIDY_CONST
{
tuning_config
tc
;
run_high_level_pipeline
();
mlir_tuning_space
params
{
mlirRockTuningSpaceCreate
(
mmodule
.
get
(),
RocmlirTuningParamSetKindFull
)};
for
(
auto
i
:
range
(
mlirRockTuningGetNumParams
(
params
.
get
())))
{
mlir_tuning_param
param
{
mlirRockTuningParamCreate
()};
if
(
not
mlirRockTuningParamGet
(
params
.
get
(),
i
,
param
.
get
()))
MIGRAPHX_THROW
(
"Incorrect mlir tuning parameter: "
+
std
::
to_string
(
i
));
std
::
array
<
char
,
ROCMLIR_TUNING_KEY_BUFSZ
>
perf_key
;
size_t
perf_key_bytes
=
mlirRockTuningParamToString
(
param
.
get
(),
perf_key
.
data
(),
perf_key
.
size
());
if
(
perf_key_bytes
>
perf_key
.
size
())
MIGRAPHX_THROW
(
"Tuning perf key was "
+
std
::
to_string
(
perf_key_bytes
)
+
" bytes and thus too long"
);
tc
.
solutions
.
emplace_back
(
perf_key
.
begin
(),
perf_key
.
begin
()
+
perf_key_bytes
);
}
std
::
array
<
char
,
ROCMLIR_TUNING_KEY_BUFSZ
>
tuning_key
;
size_t
tuning_key_bytes
=
mlirRockTuningGetKey
(
mmodule
.
get
(),
tuning_key
.
data
(),
tuning_key
.
size
());
if
(
tuning_key_bytes
>
tuning_key
.
size
())
MIGRAPHX_THROW
(
"Tuning table key was "
+
std
::
to_string
(
tuning_key_bytes
)
+
" bytes and thus too long"
);
tc
.
problem
=
std
::
string
(
tuning_key
.
begin
(),
tuning_key
.
begin
()
+
tuning_key_bytes
);
return
tc
;
}
std
::
string
get_tune_params
(
bool
xdlops
)
const
{
return
get_mlir_perf_for_conv
(
pp
,
xdlops
);
}
std
::
string
get_tune_params
(
bool
xdlops
)
const
{
return
get_mlir_perf_for_conv
(
pp
,
xdlops
);
}
// This function appends to tuning cfg file that could be
// This function appends to tuning cfg file that could be
// used with rocMLIR tuning scripts.
// used with rocMLIR tuning scripts.
void
dump_tuning_cfg
(
const
char
*
prob_config
)
const
void
dump_tuning_cfg
(
const
std
::
string
&
prob_config
)
const
{
{
std
::
string
tuning_cfg_path
=
string_value_of
(
MIGRAPHX_MLIR_TUNING_CFG
{});
std
::
string
tuning_cfg_path
=
string_value_of
(
MIGRAPHX_MLIR_TUNING_CFG
{});
if
(
!
tuning_cfg_path
.
empty
())
if
(
not
tuning_cfg_path
.
empty
())
{
{
std
::
vector
<
std
::
string
>
tokens
=
split_string
(
prob_config
,
'\t'
);
std
::
vector
<
std
::
string
>
tokens
=
split_string
(
prob_config
,
'\t'
);
std
::
string
prob
=
tokens
[
1
];
std
::
string
prob
=
tokens
[
1
];
...
@@ -628,46 +731,66 @@ struct mlir_program
...
@@ -628,46 +731,66 @@ struct mlir_program
}
}
}
}
static
mlir_tuning_table
create
_tuning_table
()
static
std
::
pair
<
mlir_tuning_table
,
bool
>
load
_tuning_table
()
{
{
mlir_tuning_table
tuning_table
{
mlirRockTuningTableCreate
()};
mlir_tuning_table
tuning_table
{
mlirRockTuningTableCreate
()};
bool
found_table
=
false
;
std
::
string
tuning_db_path
=
string_value_of
(
MIGRAPHX_MLIR_TUNING_DB
{});
std
::
string
tuning_db_path
=
string_value_of
(
MIGRAPHX_MLIR_TUNING_DB
{});
if
(
!
tuning_db_path
.
empty
())
if
(
not
tuning_db_path
.
empty
())
{
{
std
::
ifstream
tuning_db_tsv
(
tuning_db_path
);
std
::
ifstream
tuning_db_tsv
(
tuning_db_path
);
if
(
tuning_db_tsv
)
if
(
tuning_db_tsv
)
{
{
found_table
=
true
;
std
::
string
line
;
std
::
string
line
;
while
(
std
::
getline
(
tuning_db_tsv
,
line
))
while
(
std
::
getline
(
tuning_db_tsv
,
line
))
{
{
std
::
vector
<
std
::
string
>
tokens
=
split_string
(
line
,
'\t'
);
std
::
vector
<
std
::
string
>
tokens
=
split_string
(
line
,
'\t'
);
std
::
string
arch
=
tokens
[
0
];
std
::
string
arch
=
tokens
[
0
];
std
::
string
prob
=
tokens
[
1
];
std
::
string
num_cu
=
tokens
[
1
];
std
::
string
perf
=
tokens
[
2
];
std
::
string
prob
=
tokens
[
2
];
std
::
string
key
=
arch
.
append
(
"
\t
"
).
append
(
prob
);
std
::
string
perf
=
tokens
[
3
];
mlirRockTuningUpdateTable
(
tuning_table
.
get
(),
key
.
c_str
(),
perf
.
c_str
(),
1.0
);
std
::
string
key
=
arch
.
append
(
"
\t
"
).
append
(
num_cu
).
append
(
"
\t
"
).
append
(
prob
);
mlirRockTuningUpdateTable
(
tuning_table
.
get
(),
make_mlir_string_ref
(
key
),
make_mlir_string_ref
(
perf
),
1.0
);
}
}
}
}
}
}
else
else
{
{
found_table
=
false
;
std
::
cerr
std
::
cerr
<<
"WARNING: MLIR tuning db not found. Please set MIGRAPHX_MLIR_TUNING_DB for "
<<
"WARNING: MLIR tuning db not found. Please set MIGRAPHX_MLIR_TUNING_DB for "
"optimal performance."
"optimal performance."
<<
std
::
endl
;
<<
std
::
endl
;
}
}
return
tuning_table
;
return
std
::
make_pair
(
std
::
move
(
tuning_table
),
found_table
)
;
}
}
bool
get_module_tuned
()
const
bool
get_module_tuned
()
const
{
{
static
mlir_tuning_table
tuning_table
=
create
_tuning_table
();
static
std
::
pair
<
mlir_tuning_table
,
bool
>
tuning_table
=
load
_tuning_table
();
if
(
!
mlirRockTuningSetFromTable
(
tuning_table
.
get
(),
mmodule
.
get
()))
if
(
not
mlirRockTuningSetFromTable
(
tuning_table
.
first
.
get
(),
mmodule
.
get
()))
{
{
const
char
*
prob_config
=
mlirRockTuningGetKey
(
tuning_table
.
get
(),
mmodule
.
get
());
std
::
array
<
char
,
ROCMLIR_TUNING_KEY_BUFSZ
>
prob_config
;
std
::
stringstream
key
(
prob_config
);
size_t
prob_config_bytes
=
std
::
cerr
<<
"fails to set param on"
<<
prob_config
<<
std
::
endl
;
mlirRockTuningGetKey
(
mmodule
.
get
(),
prob_config
.
data
(),
prob_config
.
size
());
dump_tuning_cfg
(
prob_config
);
if
(
prob_config_bytes
>=
prob_config
.
size
())
{
std
::
cerr
<<
"MLIR tuning key overflowed buffer, needed "
<<
prob_config_bytes
<<
" bytes"
<<
std
::
endl
;
return
false
;
}
std
::
string
prob_config_str
(
prob_config
.
begin
(),
prob_config
.
begin
()
+
prob_config_bytes
);
if
(
tuning_table
.
second
)
{
std
::
cerr
<<
"NOTE: MLIR tuning table did not include a key for "
<<
prob_config_str
<<
std
::
endl
;
}
dump_tuning_cfg
(
prob_config_str
);
return
false
;
return
false
;
}
}
return
true
;
return
true
;
...
@@ -678,7 +801,8 @@ struct mlir_program
...
@@ -678,7 +801,8 @@ struct mlir_program
mlir_module
mmodule
;
mlir_module
mmodule
;
problem_params
pp
;
problem_params
pp
;
std
::
deque
<
std
::
string
>
strings
{};
std
::
deque
<
std
::
string
>
strings
{};
std
::
string
target_arch
;
std
::
string
target_arch
=
""
;
std
::
size_t
num_cu
=
0
;
std
::
string
sym_name
;
std
::
string
sym_name
;
};
};
...
@@ -690,14 +814,14 @@ std::string dump_mlir(const module& m)
...
@@ -690,14 +814,14 @@ std::string dump_mlir(const module& m)
return
mlir_print
(
&
mlirOperationPrint
,
mod_op
);
return
mlir_print
(
&
mlirOperationPrint
,
mod_op
);
}
}
void
adjust_param_shapes
(
module
&
m
,
const
std
::
vector
<
instruction_ref
>&
inputs
)
void
adjust_param_shapes
(
module
&
m
,
const
std
::
vector
<
shape
>&
inputs
)
{
{
auto
names
=
m
.
get_parameter_names
();
auto
names
=
m
.
get_parameter_names
();
std
::
sort
(
names
.
begin
(),
names
.
end
());
std
::
sort
(
names
.
begin
(),
names
.
end
());
for
(
auto
i
:
range
(
names
.
size
()))
for
(
auto
i
:
range
(
names
.
size
()))
{
{
const
auto
&
name
=
names
[
i
];
const
auto
&
name
=
names
[
i
];
const
auto
&
input
=
inputs
[
i
]
->
get_shape
()
;
const
auto
&
input
=
inputs
[
i
];
auto
param
=
m
.
get_parameter
(
name
);
auto
param
=
m
.
get_parameter
(
name
);
if
(
input
.
standard
())
if
(
input
.
standard
())
continue
;
continue
;
...
@@ -735,24 +859,26 @@ void adjust_param_shapes(module& m, const std::vector<instruction_ref>& inputs)
...
@@ -735,24 +859,26 @@ void adjust_param_shapes(module& m, const std::vector<instruction_ref>& inputs)
}
}
}
}
code_object_op
compile_mlir
(
const
context
&
,
module
m
,
const
std
::
vector
<
instruction_ref
>&
inputs
)
code_object_op
compile_mlir
(
const
context
&
migraphx_ctx
,
module
m
,
const
std
::
vector
<
instruction_ref
>&
inputs
,
const
value
&
solution
)
{
{
adjust_param_shapes
(
m
,
inputs
);
adjust_param_shapes
(
m
,
to_shapes
(
inputs
)
)
;
const
bool
trace
=
enabled
(
MIGRAPHX_TRACE_MLIR
{});
const
bool
trace
=
enabled
(
MIGRAPHX_TRACE_MLIR
{});
if
(
trace
)
if
(
trace
)
std
::
cout
<<
m
<<
std
::
endl
;
std
::
cout
<<
m
<<
std
::
endl
;
// set mutex while llvm thread support is disabled.
static
std
::
mutex
g_mlirc_mutex
;
// NOLINT
const
std
::
lock_guard
<
std
::
mutex
>
lock
(
g_mlirc_mutex
);
mlir_program
mp
;
mlir_program
mp
;
mp
.
find_target
(
);
mp
.
set_gpu_properties
(
migraphx_ctx
);
mp
.
parse
(
m
);
mp
.
parse
(
m
);
auto
mod_op
=
mlirModuleGetOperation
(
mp
.
mmodule
.
get
());
auto
mod_op
=
mlirModuleGetOperation
(
mp
.
mmodule
.
get
());
if
(
trace
)
if
(
trace
)
std
::
cout
<<
mlir_print
(
&
mlirOperationPrint
,
mod_op
)
<<
std
::
endl
;
std
::
cout
<<
mlir_print
(
&
mlirOperationPrint
,
mod_op
)
<<
std
::
endl
;
auto
co
=
mp
.
compile
();
auto
co
=
mp
.
compile
(
solution
);
co
.
output
=
m
.
get_output_shapes
().
front
();
co
.
expected_inputs
=
to_shapes
(
inputs
);
co
.
output
=
m
.
get_output_shapes
().
front
();
return
co
;
return
co
;
}
}
...
@@ -772,6 +898,17 @@ instruction_ref insert_mlir(module& m,
...
@@ -772,6 +898,17 @@ instruction_ref insert_mlir(module& m,
return
m
.
insert_instruction
(
ins
,
co
,
refs
);
return
m
.
insert_instruction
(
ins
,
co
,
refs
);
}
}
tuning_config
get_tuning_config_mlir
(
const
context
&
migraphx_ctx
,
module
m
,
const
std
::
vector
<
shape
>&
inputs
)
{
adjust_param_shapes
(
m
,
inputs
);
mlir_program
mp
;
mp
.
set_gpu_properties
(
migraphx_ctx
);
mp
.
parse
(
m
);
return
mp
.
get_tuning_config
();
}
#else
#else
std
::
string
dump_mlir
(
const
module
&
)
{
return
{};
}
std
::
string
dump_mlir
(
const
module
&
)
{
return
{};
}
...
@@ -783,20 +920,27 @@ void use(T&)
...
@@ -783,20 +920,27 @@ void use(T&)
// Disabling clang-tidy warning on non-real useage.
// Disabling clang-tidy warning on non-real useage.
// NOLINTBEGIN(performance-unnecessary-value-param)
// NOLINTBEGIN(performance-unnecessary-value-param)
code_object_op
compile_mlir
(
const
context
&
,
module
,
const
std
::
vector
<
instruction_ref
>&
)
code_object_op
compile_mlir
(
const
context
&
,
module
,
const
std
::
vector
<
instruction_ref
>&
,
const
value
&
)
{
{
return
{};
return
{};
}
}
// NOLINTEND(performance-unnecessary-value-param)
instruction_ref
instruction_ref
// cppcheck-suppress funcArgNamesDifferent
// cppcheck-suppress funcArgNamesDifferent
insert_mlir
(
module
&
m
,
instruction_ref
,
code_object_op
co
,
const
std
::
vector
<
instruction_ref
>&
)
insert_mlir
(
module
&
m
,
instruction_ref
,
code_object_op
co
,
const
std
::
vector
<
instruction_ref
>&
)
{
{
use
(
co
);
use
(
co
);
use
(
m
);
return
m
.
end
();
return
m
.
end
();
}
}
tuning_config
get_tuning_config_mlir
(
const
context
&
,
module
,
const
std
::
vector
<
shape
>&
)
{
return
{};
}
// NOLINTEND(performance-unnecessary-value-param)
#endif
#endif
}
// namespace gpu
}
// namespace gpu
...
...
src/targets/gpu/rocblas.cpp
View file @
23cb7917
...
@@ -47,32 +47,24 @@ rocblas_handle_ptr create_rocblas_handle_ptr(hipStream_t s)
...
@@ -47,32 +47,24 @@ rocblas_handle_ptr create_rocblas_handle_ptr(hipStream_t s)
return
rb
;
return
rb
;
}
}
const
std
::
unordered_set
<
std
::
string
>&
get_rocblas_fp32_archs
()
{
static
std
::
unordered_set
<
std
::
string
>
supported_archs
{
"gfx908"
,
"gfx90a"
};
return
supported_archs
;
}
bool
get_compute_fp32_flag
()
bool
get_compute_fp32_flag
()
{
{
bool
compute_fp32
=
false
;
#if ROCBLAS_VERSION_MAJOR >= 2 && ROCBLAS_VERSION_MINOR >= 38
const
auto
device_name
=
trim
(
split_string
(
get_device_name
(),
':'
).
front
());
const
auto
device_name
=
trim
(
split_string
(
get_device_name
(),
':'
).
front
());
if
(
contains
(
get_rocblas_fp32_archs
(),
device_name
))
return
(
starts_with
(
device_name
,
"gfx9"
)
and
device_name
>=
"gfx908"
);
compute_fp32
=
true
;
#endif
return
compute_fp32
;
}
}
bool
get_int8_x4_format
(
context
&
ctx
)
bool
get_int8_x4_format
(
context
&
ctx
)
{
{
bool
int8_x4_format
=
true
;
#if ROCBLAS_VERSION_MAJOR >= 3
#if ROCBLAS_VERSION_MAJOR >= 2 && ROCBLAS_VERSION_MINOR >= 38
(
void
)(
ctx
);
return
false
;
#else
// int8x4 packed format is only available starting from rocblas-v2.38 and it is deprecated in
// v3.0 and will be removed in v4.0
rocblas_gemm_flags
flag
;
rocblas_gemm_flags
flag
;
rocblas_query_int8_layout_flag
(
ctx
.
get_stream
().
get_rocblas
(),
&
flag
);
rocblas_query_int8_layout_flag
(
ctx
.
get_stream
().
get_rocblas
(),
&
flag
);
int8_x4_format
=
(
flag
==
rocblas_gemm_flags_pack_int8x4
)
;
return
flag
==
rocblas_gemm_flags_pack_int8x4
;
#endif
#endif
return
int8_x4_format
;
}
}
}
// namespace gpu
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace MIGRAPHX_INLINE_NS
...
...
src/targets/gpu/target.cpp
View file @
23cb7917
...
@@ -57,6 +57,7 @@
...
@@ -57,6 +57,7 @@
#include <migraphx/gpu/concat_gpu_opt.hpp>
#include <migraphx/gpu/concat_gpu_opt.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/device_name.hpp>
#include <migraphx/gpu/device_name.hpp>
#include <migraphx/gpu/fuse_ck.hpp>
#include <migraphx/gpu/fuse_mlir.hpp>
#include <migraphx/gpu/fuse_mlir.hpp>
#include <migraphx/gpu/fuse_ops.hpp>
#include <migraphx/gpu/fuse_ops.hpp>
#include <migraphx/gpu/prefuse_ops.hpp>
#include <migraphx/gpu/prefuse_ops.hpp>
...
@@ -72,9 +73,12 @@ inline namespace MIGRAPHX_INLINE_NS {
...
@@ -72,9 +73,12 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace
gpu
{
namespace
gpu
{
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_DISABLE_SCHEDULE_PASS
)
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_DISABLE_SCHEDULE_PASS
)
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_DISABLE_POINTWISE_FUSION
)
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_DISABLE_REDUCE_FUSION
)
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_DISABLE_REDUCE_FUSION
)
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_ENABLE_NHWC
)
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_ENABLE_NHWC
)
#ifndef _WIN32
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_ENABLE_CK
)
#endif
struct
id_pass
struct
id_pass
{
{
std
::
string
name
()
const
{
return
"id"
;
}
std
::
string
name
()
const
{
return
"id"
;
}
...
@@ -98,16 +102,17 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
...
@@ -98,16 +102,17 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
unsupported_types
.
erase
(
shape
::
type_t
::
bool_type
);
unsupported_types
.
erase
(
shape
::
type_t
::
bool_type
);
unsupported_types
.
erase
(
shape
::
type_t
::
int8_type
);
unsupported_types
.
erase
(
shape
::
type_t
::
int8_type
);
unsupported_types
.
erase
(
shape
::
type_t
::
uint8_type
);
unsupported_types
.
erase
(
shape
::
type_t
::
uint8_type
);
unsupported_types
.
erase
(
shape
::
type_t
::
int32_type
);
unsupported_types
.
erase
(
shape
::
type_t
::
tuple_type
);
unsupported_types
.
erase
(
shape
::
type_t
::
tuple_type
);
// clang-format off
// clang-format off
return
return
{
{
enable_pass
(
options
.
split_single_dyn_dim
,
split_single_dyn_dim
{}
)
,
split_single_dyn_dim
{},
enable_pass
(
options
.
split_single_dyn_dim
,
dead_code_elimination
{}
)
,
dead_code_elimination
{},
normalize_ops
{},
normalize_ops
{},
dead_code_elimination
{},
dead_code_elimination
{},
simplify_qdq
{},
simplify_qdq
{},
rewrite_quantization
{},
enable_pass
(
not
mlir_enabled
(),
rewrite_quantization
{}
)
,
dead_code_elimination
{},
dead_code_elimination
{},
eliminate_data_type
{
unsupported_types
,
shape
::
type_t
::
float_type
},
eliminate_data_type
{
unsupported_types
,
shape
::
type_t
::
float_type
},
simplify_reshapes
{},
simplify_reshapes
{},
...
@@ -121,7 +126,7 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
...
@@ -121,7 +126,7 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
inline_module
{},
inline_module
{},
rewrite_pooling
{},
rewrite_pooling
{},
dead_code_elimination
{},
dead_code_elimination
{},
rewrite_gelu
{},
enable_pass
(
options
.
fast_math
,
rewrite_gelu
{}
)
,
optimize_module
{},
optimize_module
{},
enable_pass
(
enabled
(
MIGRAPHX_ENABLE_NHWC
{}),
layout_nhwc
{}),
enable_pass
(
enabled
(
MIGRAPHX_ENABLE_NHWC
{}),
layout_nhwc
{}),
dead_code_elimination
{},
dead_code_elimination
{},
...
@@ -129,11 +134,15 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
...
@@ -129,11 +134,15 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
dead_code_elimination
{},
dead_code_elimination
{},
auto_contiguous
{},
auto_contiguous
{},
optimize_module
{},
optimize_module
{},
enable_pass
(
not
enabled
(
MIGRAPHX_DISABLE_POINTWISE_FUSION
{}),
fuse_pointwise
{}
)
,
fuse_pointwise
{},
dead_code_elimination
{},
dead_code_elimination
{},
enable_pass
(
not
enabled
(
MIGRAPHX_DISABLE_REDUCE_FUSION
{}),
fuse_reduce
{}),
enable_pass
(
not
enabled
(
MIGRAPHX_DISABLE_REDUCE_FUSION
{}),
fuse_reduce
{}),
dead_code_elimination
{},
dead_code_elimination
{},
fuse_mlir
{
&
ctx
},
#ifndef _WIN32
enable_pass
(
enabled
(
MIGRAPHX_ENABLE_CK
{}),
fuse_ck
{}),
#endif
dead_code_elimination
{},
enable_pass
(
mlir_enabled
(),
fuse_mlir
{
&
ctx
}),
dead_code_elimination
{},
dead_code_elimination
{},
lowering
{
&
ctx
,
options
.
offload_copy
},
lowering
{
&
ctx
,
options
.
offload_copy
},
eliminate_contiguous
{
"gpu::contiguous"
},
eliminate_contiguous
{
"gpu::contiguous"
},
...
@@ -150,7 +159,7 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
...
@@ -150,7 +159,7 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
dead_code_elimination
{},
dead_code_elimination
{},
adjust_allocation
{
gpu_allocation_model
{}},
adjust_allocation
{
gpu_allocation_model
{}},
dead_code_elimination
{},
dead_code_elimination
{},
compile_ops
{
&
ctx
},
compile_ops
{
&
ctx
,
options
.
exhaustive_tune
},
dead_code_elimination
{},
dead_code_elimination
{},
promote_literals
{},
promote_literals
{},
dead_code_elimination
{},
dead_code_elimination
{},
...
...
src/targets/gpu/
driver/perf
.cpp
→
src/targets/gpu/
time_op
.cpp
View file @
23cb7917
...
@@ -21,7 +21,7 @@
...
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* THE SOFTWARE.
*/
*/
#include <migraphx/gpu/
driver/perf
.hpp>
#include <migraphx/gpu/
time_op
.hpp>
#include <migraphx/context.hpp>
#include <migraphx/context.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/time.hpp>
#include <migraphx/time.hpp>
...
@@ -30,12 +30,11 @@
...
@@ -30,12 +30,11 @@
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
gpu
{
namespace
driver
{
std
::
vector
<
argument
>
generate_arguments
(
const
std
::
vector
<
shape
>&
shapes
,
unsigned
long
seed
=
0
)
std
::
vector
<
argument
>
generate_arguments
(
const
std
::
vector
<
shape
>&
shapes
,
unsigned
long
seed
=
0
)
{
{
std
::
vector
<
argument
>
args
;
std
::
vector
<
argument
>
args
;
std
::
transform
(
shapes
.
begin
(),
shapes
.
end
(),
std
::
back_inserter
(
args
),
[
&
](
auto
&
s
)
{
std
::
transform
(
shapes
.
begin
(),
shapes
.
end
(),
std
::
back_inserter
(
args
),
[
&
](
const
auto
&
s
)
{
return
to_gpu
(
generate_argument
(
s
,
seed
++
));
return
to_gpu
(
generate_argument
(
s
,
seed
++
));
});
});
return
args
;
return
args
;
...
@@ -69,7 +68,6 @@ time_op(context& ictx, operation op, const std::vector<shape>& inputs, int n)
...
@@ -69,7 +68,6 @@ time_op(context& ictx, operation op, const std::vector<shape>& inputs, int n)
return
std
::
make_pair
(
host_time
/
n
,
device_time
/
n
);
return
std
::
make_pair
(
host_time
/
n
,
device_time
/
n
);
}
}
}
// namespace driver
}
// namespace gpu
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
}
// namespace migraphx
src/targets/ref/CMakeLists.txt
View file @
23cb7917
...
@@ -37,6 +37,8 @@ target_link_libraries(migraphx_ref PUBLIC migraphx)
...
@@ -37,6 +37,8 @@ target_link_libraries(migraphx_ref PUBLIC migraphx)
target_include_directories
(
migraphx_ref PRIVATE
${
BLAZE_INCLUDE
}
)
target_include_directories
(
migraphx_ref PRIVATE
${
BLAZE_INCLUDE
}
)
target_compile_definitions
(
migraphx_ref PRIVATE -DBLAZE_USE_CPP_THREADS
)
target_compile_definitions
(
migraphx_ref PRIVATE -DBLAZE_USE_CPP_THREADS
)
migraphx_generate_export_header
(
migraphx_ref
)
rocm_install_targets
(
rocm_install_targets
(
TARGETS migraphx_ref
TARGETS migraphx_ref
INCLUDE
INCLUDE
...
...
src/targets/ref/include/migraphx/ref/context.hpp
View file @
23cb7917
...
@@ -25,6 +25,7 @@
...
@@ -25,6 +25,7 @@
#define MIGRAPHX_GUARD_RTGLIB_CONTEXT_HPP
#define MIGRAPHX_GUARD_RTGLIB_CONTEXT_HPP
#include <migraphx/config.hpp>
#include <migraphx/config.hpp>
#include <migraphx/ref/export.h>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
...
src/targets/ref/include/migraphx/ref/lowering.hpp
View file @
23cb7917
...
@@ -24,14 +24,14 @@
...
@@ -24,14 +24,14 @@
#ifndef MIGRAPHX_GUARD_RTGLIB_CPU_LOWERING_HPP
#ifndef MIGRAPHX_GUARD_RTGLIB_CPU_LOWERING_HPP
#define MIGRAPHX_GUARD_RTGLIB_CPU_LOWERING_HPP
#define MIGRAPHX_GUARD_RTGLIB_CPU_LOWERING_HPP
#include <migraphx/ref/context.hpp>
#include <migraphx/program.hpp>
#include <migraphx/program.hpp>
#include <migraphx/config.hpp>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
ref
{
namespace
ref
{
struct
lowering
struct
MIGRAPHX_REF_EXPORT
lowering
{
{
std
::
string
name
()
const
{
return
"ref::lowering"
;
}
std
::
string
name
()
const
{
return
"ref::lowering"
;
}
void
apply
(
module
&
m
)
const
;
void
apply
(
module
&
m
)
const
;
...
...
src/targets/ref/include/migraphx/ref/target.hpp
View file @
23cb7917
...
@@ -35,7 +35,7 @@ inline namespace MIGRAPHX_INLINE_NS {
...
@@ -35,7 +35,7 @@ inline namespace MIGRAPHX_INLINE_NS {
struct
pass
;
struct
pass
;
namespace
ref
{
namespace
ref
{
struct
target
struct
MIGRAPHX_REF_EXPORT
target
{
{
std
::
string
name
()
const
;
std
::
string
name
()
const
;
std
::
vector
<
pass
>
get_passes
(
migraphx
::
context
&
ctx
,
const
compile_options
&
)
const
;
std
::
vector
<
pass
>
get_passes
(
migraphx
::
context
&
ctx
,
const
compile_options
&
)
const
;
...
...
src/targets/ref/lowering.cpp
View file @
23cb7917
...
@@ -27,7 +27,7 @@
...
@@ -27,7 +27,7 @@
#include <migraphx/dfor.hpp>
#include <migraphx/dfor.hpp>
#include <migraphx/op/identity.hpp>
#include <migraphx/op/identity.hpp>
#include <migraphx/op/convolution.hpp>
#include <migraphx/op/convolution.hpp>
#include <migraphx/op/
de
convolution.hpp>
#include <migraphx/op/convolution
_backwards
.hpp>
#include <migraphx/op/quant_convolution.hpp>
#include <migraphx/op/quant_convolution.hpp>
#include <migraphx/op/dot.hpp>
#include <migraphx/op/dot.hpp>
#include <migraphx/op/quant_dot.hpp>
#include <migraphx/op/quant_dot.hpp>
...
...
src/tf/CMakeLists.txt
View file @
23cb7917
...
@@ -42,8 +42,9 @@ target_compile_options(tf-proto PRIVATE -w)
...
@@ -42,8 +42,9 @@ target_compile_options(tf-proto PRIVATE -w)
target_link_libraries
(
tf-proto PRIVATE
${
PROTOBUF_LIBRARY
}
)
target_link_libraries
(
tf-proto PRIVATE
${
PROTOBUF_LIBRARY
}
)
set_target_properties
(
tf-proto PROPERTIES POSITION_INDEPENDENT_CODE On
)
set_target_properties
(
tf-proto PROPERTIES POSITION_INDEPENDENT_CODE On
)
file
(
GLOB TF_SRCS
${
CONFIGURE_DEPENDS
}
*.cpp
)
file
(
GLOB TF_SRCS CONFIGURE_DEPENDS *.cpp
)
add_library
(
migraphx_tf
${
TF_SRCS
}
)
add_library
(
migraphx_tf
${
TF_SRCS
}
)
migraphx_generate_export_header
(
migraphx_tf
)
target_include_directories
(
migraphx_tf PRIVATE include
)
target_include_directories
(
migraphx_tf PRIVATE include
)
set_target_properties
(
migraphx_tf PROPERTIES EXPORT_NAME tf
)
set_target_properties
(
migraphx_tf PROPERTIES EXPORT_NAME tf
)
rocm_set_soversion
(
migraphx_tf
${
MIGRAPHX_SO_VERSION
}
)
rocm_set_soversion
(
migraphx_tf
${
MIGRAPHX_SO_VERSION
}
)
...
...
src/tf/op_parser.cpp
View file @
23cb7917
...
@@ -46,6 +46,7 @@ std::vector<std::string> get_op_parsers()
...
@@ -46,6 +46,7 @@ std::vector<std::string> get_op_parsers()
op_parser_map
().
end
(),
op_parser_map
().
end
(),
std
::
back_inserter
(
result
),
std
::
back_inserter
(
result
),
[
&
](
auto
&&
p
)
{
return
p
.
first
;
});
[
&
](
auto
&&
p
)
{
return
p
.
first
;
});
std
::
sort
(
result
.
begin
(),
result
.
end
());
return
result
;
return
result
;
}
}
...
...
src/tf/parse_batchnorm.cpp
View file @
23cb7917
...
@@ -52,7 +52,6 @@ struct parse_batchnorm : op_parser<parse_batchnorm>
...
@@ -52,7 +52,6 @@ struct parse_batchnorm : op_parser<parse_batchnorm>
auto
x_type
=
args
[
0
]
->
get_shape
().
type
();
auto
x_type
=
args
[
0
]
->
get_shape
().
type
();
// unsqueeze tensors of shape (C) to broadcast correctly
// unsqueeze tensors of shape (C) to broadcast correctly
auto
rt
=
info
.
add_literal
(
migraphx
::
literal
{
migraphx
::
shape
{
x_type
},
{
0.5
}});
auto
eps
=
info
.
add_literal
(
migraphx
::
literal
{
migraphx
::
shape
{
x_type
},
{
epsilon
}});
auto
eps
=
info
.
add_literal
(
migraphx
::
literal
{
migraphx
::
shape
{
x_type
},
{
epsilon
}});
auto
scale_unsqueeze
=
auto
scale_unsqueeze
=
...
@@ -64,11 +63,11 @@ struct parse_batchnorm : op_parser<parse_batchnorm>
...
@@ -64,11 +63,11 @@ struct parse_batchnorm : op_parser<parse_batchnorm>
auto
var_unsqueeze
=
auto
var_unsqueeze
=
info
.
add_instruction
(
migraphx
::
make_op
(
"unsqueeze"
,
{{
"axes"
,
{
1
,
2
}}}),
args
[
4
]);
info
.
add_instruction
(
migraphx
::
make_op
(
"unsqueeze"
,
{{
"axes"
,
{
1
,
2
}}}),
args
[
4
]);
auto
numer
=
info
.
add_broadcastable_binary_op
(
"sub"
,
args
[
0
],
mean_unsqueeze
);
auto
x_sub_mean
=
info
.
add_broadcastable_binary_op
(
"sub"
,
args
[
0
],
mean_unsqueeze
);
auto
var_eps
=
info
.
add_broadcastable_binary_op
(
"add"
,
var_unsqueeze
,
eps
);
auto
var_eps
=
info
.
add_broadcastable_binary_op
(
"add"
,
var_unsqueeze
,
eps
);
auto
denom
=
info
.
add_
broadcastable_binary_op
(
"pow"
,
var_eps
,
rt
);
auto
rsqrt
=
info
.
add_
instruction
(
make_op
(
"rsqrt"
)
,
var_eps
);
auto
div0
=
info
.
add_broadcastable_binary_op
(
"
div"
,
numer
,
denom
);
auto
mul0
=
info
.
add_broadcastable_binary_op
(
"
mul"
,
scale_unsqueeze
,
rsqrt
);
auto
r0
=
info
.
add_broadcastable_binary_op
(
"mul"
,
div0
,
scale_unsqueeze
);
auto
r0
=
info
.
add_broadcastable_binary_op
(
"mul"
,
x_sub_mean
,
mul0
);
return
info
.
add_broadcastable_binary_op
(
"add"
,
r0
,
bias_unsqueeze
);
return
info
.
add_broadcastable_binary_op
(
"add"
,
r0
,
bias_unsqueeze
);
}
}
};
};
...
...
src/tf/tf.cpp
View file @
23cb7917
...
@@ -22,6 +22,7 @@
...
@@ -22,6 +22,7 @@
* THE SOFTWARE.
* THE SOFTWARE.
*/
*/
#include <migraphx/tf/tf_parser.hpp>
#include <migraphx/tf/tf_parser.hpp>
#include <migraphx/tf/op_parser.hpp>
#include <iostream>
#include <iostream>
#include <fstream>
#include <fstream>
#include <unordered_map>
#include <unordered_map>
...
@@ -62,5 +63,7 @@ program parse_tf(const std::string& name, const tf_options& options)
...
@@ -62,5 +63,7 @@ program parse_tf(const std::string& name, const tf_options& options)
return
std
::
move
(
parser
.
prog
);
return
std
::
move
(
parser
.
prog
);
}
}
std
::
vector
<
std
::
string
>
get_tf_operators
()
{
return
tf
::
get_op_parsers
();
}
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
}
// namespace migraphx
src/tf/tf_parser.cpp
View file @
23cb7917
...
@@ -338,7 +338,7 @@ void tf_parser::parse_node(const std::string& name)
...
@@ -338,7 +338,7 @@ void tf_parser::parse_node(const std::string& name)
std
::
string
input_name
=
input
;
std
::
string
input_name
=
input
;
// if input has trailing `:0` index then remove it
// if input has trailing `:0` index then remove it
auto
multi_out_idx
=
input
.
find
(
':'
);
auto
multi_out_idx
=
input
.
find
(
':'
);
if
(
multi_out_idx
!=
std
::
string
::
npos
&&
input
.
substr
(
multi_out_idx
+
1
)
==
"0"
)
if
(
multi_out_idx
!=
std
::
string
::
npos
and
input
.
substr
(
multi_out_idx
+
1
)
==
"0"
)
{
{
input_name
=
input
.
substr
(
0
,
multi_out_idx
);
input_name
=
input
.
substr
(
0
,
multi_out_idx
);
}
}
...
...
src/value.cpp
View file @
23cb7917
...
@@ -28,6 +28,7 @@
...
@@ -28,6 +28,7 @@
#include <migraphx/stringutils.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/value.hpp>
#include <migraphx/value.hpp>
#include <migraphx/optional.hpp>
#include <migraphx/optional.hpp>
#include <migraphx/hash.hpp>
#include <unordered_map>
#include <unordered_map>
#include <utility>
#include <utility>
...
@@ -284,7 +285,7 @@ bool value::contains(const std::string& pkey) const
...
@@ -284,7 +285,7 @@ bool value::contains(const std::string& pkey) const
}
}
std
::
size_t
value
::
size
()
const
std
::
size_t
value
::
size
()
const
{
{
auto
*
a
=
if_array_impl
(
x
);
const
auto
*
a
=
if_array_impl
(
x
);
if
(
a
==
nullptr
)
if
(
a
==
nullptr
)
return
0
;
return
0
;
return
a
->
size
();
return
a
->
size
();
...
@@ -519,6 +520,38 @@ std::ostream& operator<<(std::ostream& os, const value& d)
...
@@ -519,6 +520,38 @@ std::ostream& operator<<(std::ostream& os, const value& d)
return
os
;
return
os
;
}
}
template
<
class
T
>
std
::
size_t
value_hash
(
const
std
::
string
&
key
,
const
T
&
x
)
{
std
::
size_t
h
=
hash_value
(
key
);
hash_combine
(
h
,
x
);
return
h
;
}
std
::
size_t
value_hash
(
const
std
::
string
&
key
,
std
::
nullptr_t
)
{
return
hash_value
(
key
);
}
std
::
size_t
value_hash
(
const
std
::
string
&
key
,
const
std
::
vector
<
value
>&
x
)
{
std
::
size_t
h
=
hash_value
(
key
);
for
(
const
auto
&
v
:
x
)
hash_combine
(
h
,
v
);
return
h
;
}
std
::
size_t
value_hash
(
const
std
::
string
&
key
,
const
value
::
binary
&
x
)
{
std
::
size_t
h
=
hash_value
(
key
);
for
(
const
auto
&
v
:
x
)
hash_combine
(
h
,
v
);
return
h
;
}
std
::
size_t
value
::
hash
()
const
{
std
::
size_t
h
=
0
;
this
->
visit_value
([
&
](
const
auto
&
a
)
{
h
=
value_hash
(
this
->
get_key
(),
a
);
});
return
h
;
}
void
value
::
debug_print
(
bool
show_type
)
const
void
value
::
debug_print
(
bool
show_type
)
const
{
{
if
(
show_type
)
if
(
show_type
)
...
...
src/verify_args.cpp
View file @
23cb7917
...
@@ -35,7 +35,7 @@ bool verify_args(const std::string& name,
...
@@ -35,7 +35,7 @@ bool verify_args(const std::string& name,
bool
passed
=
true
;
bool
passed
=
true
;
visit_all
(
ref_arg
,
target_arg
)([
&
](
auto
ref
,
auto
target
)
{
visit_all
(
ref_arg
,
target_arg
)([
&
](
auto
ref
,
auto
target
)
{
double
error
;
double
error
;
passed
=
verify_range
(
ref
,
target
,
tolerance
,
&
error
);
passed
=
verify
::
verify_range
(
ref
,
target
,
tolerance
,
&
error
);
if
(
not
passed
)
if
(
not
passed
)
{
{
// TODO: Check for nans
// TODO: Check for nans
...
@@ -45,27 +45,27 @@ bool verify_args(const std::string& name,
...
@@ -45,27 +45,27 @@ bool verify_args(const std::string& name,
std
::
cout
<<
"ref:"
<<
ref
<<
std
::
endl
;
std
::
cout
<<
"ref:"
<<
ref
<<
std
::
endl
;
if
(
target
.
size
()
<
32
)
if
(
target
.
size
()
<
32
)
std
::
cout
<<
"target:"
<<
target
<<
std
::
endl
;
std
::
cout
<<
"target:"
<<
target
<<
std
::
endl
;
if
(
range_zero
(
ref
))
if
(
verify
::
range_zero
(
ref
))
std
::
cout
<<
"Ref data is all zeros"
<<
std
::
endl
;
std
::
cout
<<
"Ref data is all zeros"
<<
std
::
endl
;
if
(
range_zero
(
target
))
if
(
verify
::
range_zero
(
target
))
std
::
cout
<<
"Target data is all zeros"
<<
std
::
endl
;
std
::
cout
<<
"Target data is all zeros"
<<
std
::
endl
;
auto
mxdiff
=
max_diff
(
ref
,
target
);
auto
mxdiff
=
verify
::
max_diff
(
ref
,
target
);
std
::
cout
<<
"Max diff: "
<<
mxdiff
<<
std
::
endl
;
std
::
cout
<<
"Max diff: "
<<
mxdiff
<<
std
::
endl
;
auto
idx
=
mismatch_idx
(
ref
,
target
,
float_equal
);
auto
idx
=
verify
::
mismatch_idx
(
ref
,
target
,
float_equal
);
if
(
idx
<
range_distance
(
ref
))
if
(
idx
<
verify
::
range_distance
(
ref
))
{
{
std
::
cout
<<
"Mismatch at "
<<
idx
<<
": "
<<
ref
[
idx
]
<<
" != "
<<
target
[
idx
]
std
::
cout
<<
"Mismatch at "
<<
idx
<<
": "
<<
ref
[
idx
]
<<
" != "
<<
target
[
idx
]
<<
std
::
endl
;
<<
std
::
endl
;
}
}
auto
ref_nan_idx
=
find_idx
(
ref
,
not_finite
);
auto
ref_nan_idx
=
find_idx
(
ref
,
verify
::
not_finite
);
if
(
ref_nan_idx
>=
0
)
if
(
ref_nan_idx
>=
0
)
std
::
cout
<<
"Non finite number found in ref at "
<<
ref_nan_idx
<<
": "
std
::
cout
<<
"Non finite number found in ref at "
<<
ref_nan_idx
<<
": "
<<
ref
[
ref_nan_idx
]
<<
std
::
endl
;
<<
ref
[
ref_nan_idx
]
<<
std
::
endl
;
auto
target_nan_idx
=
find_idx
(
target
,
not_finite
);
auto
target_nan_idx
=
find_idx
(
target
,
verify
::
not_finite
);
if
(
target_nan_idx
>=
0
)
if
(
target_nan_idx
>=
0
)
std
::
cout
<<
"Non finite number found in target at "
<<
target_nan_idx
<<
": "
std
::
cout
<<
"Non finite number found in target at "
<<
target_nan_idx
<<
": "
<<
target
[
target_nan_idx
]
<<
std
::
endl
;
<<
target
[
target_nan_idx
]
<<
std
::
endl
;
...
@@ -73,27 +73,27 @@ bool verify_args(const std::string& name,
...
@@ -73,27 +73,27 @@ bool verify_args(const std::string& name,
}
}
else
else
{
{
if
(
range_zero
(
ref
))
if
(
verify
::
range_zero
(
ref
))
std
::
cout
<<
"Ref data is all zeros"
<<
std
::
endl
;
std
::
cout
<<
"Ref data is all zeros"
<<
std
::
endl
;
if
(
range_zero
(
target
))
if
(
verify
::
range_zero
(
target
))
std
::
cout
<<
"Target data is all zeros"
<<
std
::
endl
;
std
::
cout
<<
"Target data is all zeros"
<<
std
::
endl
;
// auto mxdiff = max_diff(ref, target);
// auto mxdiff = max_diff(ref, target);
// std::cout << "Max diff: " << mxdiff << std::endl;
// std::cout << "Max diff: " << mxdiff << std::endl;
// auto idx = mismatch_idx(ref, target, float_equal);
// auto idx = mismatch_idx(ref, target, float_equal);
// if(idx < range_distance(ref))
// if(idx <
verify::
range_distance(ref))
// {
// {
// std::cout << "Mismatch at " << idx << ": " << ref[idx] << " != " << target[idx]
// std::cout << "Mismatch at " << idx << ": " << ref[idx] << " != " << target[idx]
// << std::endl;
// << std::endl;
// }
// }
auto
ref_nan_idx
=
find_idx
(
ref
,
not_finite
);
auto
ref_nan_idx
=
find_idx
(
ref
,
verify
::
not_finite
);
if
(
ref_nan_idx
>=
0
)
if
(
ref_nan_idx
>=
0
)
std
::
cout
<<
"Non finite number found in ref at "
<<
ref_nan_idx
<<
": "
std
::
cout
<<
"Non finite number found in ref at "
<<
ref_nan_idx
<<
": "
<<
ref
[
ref_nan_idx
]
<<
std
::
endl
;
<<
ref
[
ref_nan_idx
]
<<
std
::
endl
;
auto
target_nan_idx
=
find_idx
(
target
,
not_finite
);
auto
target_nan_idx
=
find_idx
(
target
,
verify
::
not_finite
);
if
(
target_nan_idx
>=
0
)
if
(
target_nan_idx
>=
0
)
std
::
cout
<<
"Non finite number found in target at "
<<
target_nan_idx
<<
": "
std
::
cout
<<
"Non finite number found in target at "
<<
target_nan_idx
<<
": "
<<
target
[
target_nan_idx
]
<<
std
::
endl
;
<<
target
[
target_nan_idx
]
<<
std
::
endl
;
...
...
test/CMakeLists.txt
View file @
23cb7917
...
@@ -24,8 +24,6 @@
...
@@ -24,8 +24,6 @@
cmake_policy
(
SET CMP0057 NEW
)
cmake_policy
(
SET CMP0057 NEW
)
include
(
CTest
)
find_package
(
Threads REQUIRED
)
find_package
(
Threads REQUIRED
)
include
(
ProcessorCount
)
include
(
ProcessorCount
)
ProcessorCount
(
N
)
ProcessorCount
(
N
)
...
@@ -100,21 +98,15 @@ endfunction()
...
@@ -100,21 +98,15 @@ endfunction()
function
(
add_test_executable TEST_NAME
)
function
(
add_test_executable TEST_NAME
)
add_executable
(
${
TEST_NAME
}
EXCLUDE_FROM_ALL
${
ARGN
}
)
add_executable
(
${
TEST_NAME
}
EXCLUDE_FROM_ALL
${
ARGN
}
)
target_link_libraries
(
${
TEST_NAME
}
${
CMAKE_THREAD_LIBS_INIT
}
)
# Cmake does not add flags correctly for gcc
if
(
CMAKE_CXX_COMPILER_ID MATCHES
"GNU"
)
set_target_properties
(
${
TEST_NAME
}
PROPERTIES COMPILE_FLAGS -pthread LINK_FLAGS -pthread
)
endif
()
set
(
TEST_COMMAND
${
TEST_NAME
}
)
set
(
TEST_COMMAND
${
TEST_NAME
}
)
add_test_command
(
${
TEST_NAME
}
${
TEST_COMMAND
}
)
add_test_command
(
${
TEST_NAME
}
${
TEST_COMMAND
}
)
add_dependencies
(
tests
${
TEST_NAME
}
)
add_dependencies
(
tests
${
TEST_NAME
}
)
add_dependencies
(
check
${
TEST_NAME
}
)
add_dependencies
(
check
${
TEST_NAME
}
)
target_link_libraries
(
${
TEST_NAME
}
migraphx migraphx_onnx migraphx_ref
)
target_link_libraries
(
${
TEST_NAME
}
Threads::Threads
migraphx migraphx_onnx migraphx_ref
)
target_include_directories
(
${
TEST_NAME
}
PUBLIC include
)
target_include_directories
(
${
TEST_NAME
}
PUBLIC include
)
endfunction
(
add_test_executable
)
endfunction
(
add_test_executable
)
file
(
GLOB TESTS
${
CONFIGURE_DEPENDS
}
*.cpp
)
file
(
GLOB TESTS CONFIGURE_DEPENDS *.cpp
)
foreach
(
TEST
${
TESTS
}
)
foreach
(
TEST
${
TESTS
}
)
get_filename_component
(
BASE_NAME
${
TEST
}
NAME_WE
)
get_filename_component
(
BASE_NAME
${
TEST
}
NAME_WE
)
...
@@ -124,7 +116,7 @@ endforeach()
...
@@ -124,7 +116,7 @@ endforeach()
if
(
MIGRAPHX_ENABLE_GPU
)
if
(
MIGRAPHX_ENABLE_GPU
)
# gpu tests
# gpu tests
file
(
GLOB GPU_TESTS
${
CONFIGURE_DEPENDS
}
gpu/*.cpp
)
file
(
GLOB GPU_TESTS CONFIGURE_DEPENDS gpu/*.cpp
)
foreach
(
TEST
${
GPU_TESTS
}
)
foreach
(
TEST
${
GPU_TESTS
}
)
get_filename_component
(
BASE_NAME
${
TEST
}
NAME_WE
)
get_filename_component
(
BASE_NAME
${
TEST
}
NAME_WE
)
...
@@ -134,13 +126,16 @@ if(MIGRAPHX_ENABLE_GPU)
...
@@ -134,13 +126,16 @@ if(MIGRAPHX_ENABLE_GPU)
COST 10
COST 10
RESOURCE_LOCK gpu
RESOURCE_LOCK gpu
)
)
if
(
MIGRAPHX_USE_HIPRTC
)
target_compile_definitions
(
test_gpu_
${
BASE_NAME
}
PUBLIC -DMIGRAPHX_USE_HIPRTC
)
endif
()
target_link_libraries
(
test_gpu_
${
BASE_NAME
}
migraphx_gpu migraphx_kernels
)
target_link_libraries
(
test_gpu_
${
BASE_NAME
}
migraphx_gpu migraphx_kernels
)
endforeach
()
endforeach
()
endif
()
endif
()
if
(
MIGRAPHX_ENABLE_FPGA
)
if
(
MIGRAPHX_ENABLE_FPGA
)
# fpga tests
# fpga tests
file
(
GLOB FPGA_TESTS
${
CONFIGURE_DEPENDS
}
fpga/*.cpp
)
file
(
GLOB FPGA_TESTS CONFIGURE_DEPENDS fpga/*.cpp
)
foreach
(
TEST
${
FPGA_TESTS
}
)
foreach
(
TEST
${
FPGA_TESTS
}
)
get_filename_component
(
BASE_NAME
${
TEST
}
NAME_WE
)
get_filename_component
(
BASE_NAME
${
TEST
}
NAME_WE
)
...
@@ -187,12 +182,36 @@ if(MIGRAPHX_ENABLE_PYTHON)
...
@@ -187,12 +182,36 @@ if(MIGRAPHX_ENABLE_PYTHON)
add_subdirectory
(
py
)
add_subdirectory
(
py
)
endif
()
endif
()
# multitarget test
if
(
MIGRAPHX_ENABLE_GPU AND MIGRAPHX_ENABLE_CPU AND MIGRAPHX_ENABLE_FPGA
)
set
(
TEST_MULTI_TARGET_DIR
${
CMAKE_CURRENT_SOURCE_DIR
}
/multi_target
)
file
(
GLOB MULTI_TARGET_TESTS CONFIGURE_DEPENDS
${
TEST_MULTI_TARGET_DIR
}
/*.cpp
)
foreach
(
MULTI_TARGET_TEST
${
MULTI_TARGET_TESTS
}
)
get_filename_component
(
BASE_NAME
${
MULTI_TARGET_TEST
}
NAME_WE
)
set
(
TEST_NAME test_
${
BASE_NAME
}
)
add_executable
(
${
TEST_NAME
}
${
MULTI_TARGET_TEST
}
)
rocm_clang_tidy_check
(
${
TEST_NAME
}
)
target_link_libraries
(
${
TEST_NAME
}
migraphx migraphx_onnx migraphx_tf migraphx_all_targets
)
target_include_directories
(
${
TEST_NAME
}
PUBLIC include
)
add_test
(
NAME
${
TEST_NAME
}
COMMAND $<TARGET_FILE:
${
TEST_NAME
}
> WORKING_DIRECTORY
${
TEST_MULTI_TARGET_DIR
}
)
add_dependencies
(
tests
${
TEST_NAME
}
)
add_dependencies
(
check
${
TEST_NAME
}
)
endforeach
()
endif
()
function
(
test_header NAME HEADER
)
function
(
test_header NAME HEADER
)
file
(
WRITE
${
CMAKE_CURRENT_BINARY_DIR
}
/header-main-include-
${
NAME
}
.cpp
file
(
WRITE
${
CMAKE_CURRENT_BINARY_DIR
}
/header-main-include-
${
NAME
}
.cpp
"
"#include <
${
HEADER
}
>
\n
int main() {}
\n
"
#include <
${
HEADER
}
>
int main() {}
\n
"
)
)
file
(
WRITE
${
CMAKE_CURRENT_BINARY_DIR
}
/header-static-include-
${
NAME
}
.cpp
file
(
WRITE
${
CMAKE_CURRENT_BINARY_DIR
}
/header-static-include-
${
NAME
}
.cpp
"
"#include <
${
HEADER
}
>
\n
"
#include <
${
HEADER
}
>
#if defined(min) || defined(max) || defined(near) || defined(far)
#error
\"
Do not include windows.h in header files
\"
#endif
\n
"
)
)
add_test_executable
(
${
NAME
}
add_test_executable
(
${
NAME
}
${
CMAKE_CURRENT_BINARY_DIR
}
/header-main-include-
${
NAME
}
.cpp
${
CMAKE_CURRENT_BINARY_DIR
}
/header-main-include-
${
NAME
}
.cpp
...
@@ -201,14 +220,14 @@ function(test_header NAME HEADER)
...
@@ -201,14 +220,14 @@ function(test_header NAME HEADER)
endfunction
()
endfunction
()
function
(
test_headers PREFIX
)
function
(
test_headers PREFIX
)
file
(
GLOB HEADERS
${
CONFIGURE_DEPENDS
}
${
ARGN
}
)
file
(
GLOB HEADERS CONFIGURE_DEPENDS
${
ARGN
}
)
foreach
(
HEADER
${
HEADERS
}
)
foreach
(
HEADER
${
HEADERS
}
)
file
(
RELATIVE_PATH HEADER_REL
${
CMAKE_SOURCE_DIR
}
${
HEADER
}
)
file
(
RELATIVE_PATH HEADER_REL
${
CMAKE_SOURCE_DIR
}
${
HEADER
}
)
string
(
MAKE_C_IDENTIFIER
${
HEADER_REL
}
TEST_NAME
)
string
(
MAKE_C_IDENTIFIER
${
HEADER_REL
}
TEST_NAME
)
get_filename_component
(
BASE_NAME
${
HEADER
}
NAME_WE
)
get_filename_component
(
BASE_NAME
${
HEADER
}
NAME_WE
)
test_header
(
header_
${
TEST_NAME
}
${
PREFIX
}
/
${
BASE_NAME
}
.hpp
)
test_header
(
header_
${
TEST_NAME
}
${
PREFIX
}
/
${
BASE_NAME
}
.hpp
)
target_link_libraries
(
header_
${
TEST_NAME
}
migraphx_all_targets
)
target_link_libraries
(
header_
${
TEST_NAME
}
migraphx migraphx_onnx migraphx_tf
migraphx_all_targets
)
endforeach
()
endforeach
()
endfunction
()
endfunction
()
...
@@ -225,3 +244,4 @@ if(MIGRAPHX_ENABLE_FPGA)
...
@@ -225,3 +244,4 @@ if(MIGRAPHX_ENABLE_FPGA)
test_headers
(
migraphx/fpga
${
CMAKE_SOURCE_DIR
}
/src/targets/fpga/include/migraphx/fpga/*.hpp
)
test_headers
(
migraphx/fpga
${
CMAKE_SOURCE_DIR
}
/src/targets/fpga/include/migraphx/fpga/*.hpp
)
endif
()
endif
()
Prev
1
…
13
14
15
16
17
18
19
20
21
…
23
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment