Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
23cb7917
Unverified
Commit
23cb7917
authored
Aug 16, 2023
by
Brian Pickrell
Committed by
GitHub
Aug 16, 2023
Browse files
Merge branch 'develop' into blas_tuning
parents
b5fcc0bc
ea32ca70
Changes
458
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
377 additions
and
160 deletions
+377
-160
src/targets/gpu/kernels/include/migraphx/kernels/type_traits.hpp
...gets/gpu/kernels/include/migraphx/kernels/type_traits.hpp
+11
-3
src/targets/gpu/kernels/include/migraphx/kernels/vec.hpp
src/targets/gpu/kernels/include/migraphx/kernels/vec.hpp
+1
-1
src/targets/gpu/lowering.cpp
src/targets/gpu/lowering.cpp
+18
-12
src/targets/gpu/mlir.cpp
src/targets/gpu/mlir.cpp
+216
-72
src/targets/gpu/rocblas.cpp
src/targets/gpu/rocblas.cpp
+8
-16
src/targets/gpu/target.cpp
src/targets/gpu/target.cpp
+17
-8
src/targets/gpu/time_op.cpp
src/targets/gpu/time_op.cpp
+2
-4
src/targets/ref/CMakeLists.txt
src/targets/ref/CMakeLists.txt
+2
-0
src/targets/ref/include/migraphx/ref/context.hpp
src/targets/ref/include/migraphx/ref/context.hpp
+1
-0
src/targets/ref/include/migraphx/ref/lowering.hpp
src/targets/ref/include/migraphx/ref/lowering.hpp
+2
-2
src/targets/ref/include/migraphx/ref/target.hpp
src/targets/ref/include/migraphx/ref/target.hpp
+1
-1
src/targets/ref/lowering.cpp
src/targets/ref/lowering.cpp
+1
-1
src/tf/CMakeLists.txt
src/tf/CMakeLists.txt
+2
-1
src/tf/op_parser.cpp
src/tf/op_parser.cpp
+1
-0
src/tf/parse_batchnorm.cpp
src/tf/parse_batchnorm.cpp
+5
-6
src/tf/tf.cpp
src/tf/tf.cpp
+3
-0
src/tf/tf_parser.cpp
src/tf/tf_parser.cpp
+1
-1
src/value.cpp
src/value.cpp
+34
-1
src/verify_args.cpp
src/verify_args.cpp
+13
-13
test/CMakeLists.txt
test/CMakeLists.txt
+38
-18
No files found.
src/targets/gpu/kernels/include/migraphx/kernels/type_traits.hpp
View file @
23cb7917
...
...
@@ -218,7 +218,15 @@ using common_type_t = typename common_type<Ts...>::type;
#define MIGRAPHX_REQUIRES(...) class = enable_if_t<__VA_ARGS__>
constexpr
unsigned
long
int_max
(
unsigned
long
n
)
{
return
(
1u
<<
(
n
*
8
))
-
1
;
}
constexpr
unsigned
long
int_max
(
unsigned
long
n
)
{
// Note, left shift cannot be used to get the maximum value of int64_type or
// uint64_type because it is undefined behavior to left shift 64 bits for
// these types
if
(
n
==
sizeof
(
int64_t
))
return
-
1
;
return
(
1ul
<<
(
n
*
8
))
-
1
;
}
template
<
class
T
,
MIGRAPHX_REQUIRES
(
is_integral
<
T
>{}
or
is_floating_point
<
T
>
{}
or
...
...
@@ -228,9 +236,9 @@ constexpr T numeric_max()
if
constexpr
(
is_integral
<
T
>
{})
{
if
constexpr
(
is_unsigned
<
T
>
{})
return
int_max
(
sizeof
(
T
))
*
2
;
else
return
int_max
(
sizeof
(
T
));
else
return
int_max
(
sizeof
(
T
))
/
2
;
}
else
if
constexpr
(
is_same
<
T
,
double
>
{})
return
__DBL_MAX__
;
...
...
src/targets/gpu/kernels/include/migraphx/kernels/vec.hpp
View file @
23cb7917
...
...
@@ -135,7 +135,7 @@ constexpr vec<vec_type<T>, N> vec_packed_at(T x, I i)
return
vec
<
T
,
N
>
{
x
};
else
{
MIGRAPHX_ASSERT
((
i
+
N
)
<
vec_size
<
T
>
());
MIGRAPHX_ASSERT
((
i
+
N
)
<
=
vec_size
<
T
>
());
vec
<
vec_type
<
T
>
,
N
>
result
=
{
0
};
for
(
int
j
=
0
;
j
<
N
;
j
++
)
{
...
...
src/targets/gpu/lowering.cpp
View file @
23cb7917
...
...
@@ -22,12 +22,19 @@
* THE SOFTWARE.
*/
#include <iterator>
#include <migraphx/gpu/lowering.hpp>
#include <utility>
#include <functional>
#include <algorithm>
#include <map>
#include <migraphx/manage_ptr.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/instruction_ref.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/pass_manager.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/program.hpp>
#include <migraphx/op/dot.hpp>
#include <migraphx/op/if_op.hpp>
...
...
@@ -35,17 +42,12 @@
#include <migraphx/op/quant_dot.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/lowering.hpp>
#include <migraphx/gpu/device_name.hpp>
#include <migraphx/gpu/gemm.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <migraphx/gpu/rocblas.hpp>
#include <migraphx/gpu/compiler.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/program.hpp>
#include <utility>
#include <functional>
#include <algorithm>
#include <map>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
...
@@ -54,6 +56,7 @@ namespace gpu {
struct
miopen_apply
{
module
*
mod
=
nullptr
;
module_pass_manager
*
mpm
=
nullptr
;
const
lowering
*
pass
=
nullptr
;
std
::
unordered_map
<
std
::
string
,
std
::
function
<
instruction_ref
(
instruction_ref
)
>>
apply_map
{};
instruction_ref
last
{};
...
...
@@ -83,7 +86,7 @@ struct miopen_apply
auto
&
ctx
=
get_context
();
int8_x4_format
=
get_int8_x4_format
(
ctx
);
compute_fp32
=
get_compute_fp32_flag
();
offload_copy
=
(
mod
->
name
()
==
"main"
)
?
pass
->
offload_copy
:
false
;
offload_copy
=
(
mod
==
mpm
->
get_root_module
()
)
?
pass
->
offload_copy
:
false
;
add_generic_op
(
"contiguous"
);
...
...
@@ -103,7 +106,7 @@ struct miopen_apply
add_extend_op
(
"topk"
);
add_convolution_op
(
"convolution"
);
add_convolution_op
(
"
de
convolution"
);
add_convolution_op
(
"convolution
_backwards
"
);
add_convolution_op
(
"quant_convolution"
);
add_gemm_op
<
op
::
dot
>
(
"dot"
);
add_gemm_op
<
op
::
quant_dot
>
(
"quant_dot"
);
...
...
@@ -375,7 +378,10 @@ struct miopen_apply
}
};
void
lowering
::
apply
(
module
&
m
)
const
{
miopen_apply
{
&
m
,
this
}.
apply
();
}
void
lowering
::
apply
(
module_pass_manager
&
mpm
)
const
{
miopen_apply
{
&
mpm
.
get_module
(),
&
mpm
,
this
}.
apply
();
}
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
...
...
src/targets/gpu/mlir.cpp
View file @
23cb7917
...
...
@@ -36,7 +36,10 @@
#include <mutex>
#if !defined(MLIR_MIGRAPHX_DIALECT_API_VERSION) || MLIR_MIGRAPHX_DIALECT_API_VERSION != 3
#warning "Incompatible version of rocMLIR library used, disabling"
// Only undefine when not using cppcheck
#ifndef CPPCHECK
#undef MIGRAPHX_MLIR
#endif
#else
#include <mlir-c/RegisterRocMLIR.h>
#endif
...
...
@@ -50,8 +53,10 @@
#include <migraphx/ranges.hpp>
#include <migraphx/gpu/code_object_op.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/compile_gen.hpp>
#include <migraphx/gpu/device_name.hpp>
#include <migraphx/gpu/perfdb.hpp>
#include <migraphx/gpu/tuning_config.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/permutation.hpp>
#include <deque>
...
...
@@ -122,6 +127,9 @@ struct mlir_handle
#define MIGRAPHX_MANAGE_MLIR_HANDLE(T, F) migraphx::gpu::mlir_handle<T, decltype(&F), &F> // NOLINT
using
mlir_context
=
MIGRAPHX_MANAGE_MLIR_HANDLE
(
MlirContext
,
mlirContextDestroy
);
using
mlir_thread_pool
=
MIGRAPHX_MANAGE_MLIR_HANDLE
(
MlirLlvmThreadPool
,
mlirLlvmThreadPoolDestroy
);
using
mlir_dialect_registry
=
MIGRAPHX_MANAGE_MLIR_HANDLE
(
MlirDialectRegistry
,
mlirDialectRegistryDestroy
);
using
mlir_module
=
MIGRAPHX_MANAGE_MLIR_HANDLE
(
MlirModule
,
mlirModuleDestroy
);
using
mlir_operation
=
MIGRAPHX_MANAGE_MLIR_HANDLE
(
MlirOperation
,
mlirOperationDestroy
);
using
mlir_op_printing_flags
=
MIGRAPHX_MANAGE_MLIR_HANDLE
(
MlirOpPrintingFlags
,
...
...
@@ -131,6 +139,10 @@ using mlir_block = MIGRAPHX_MANAGE_MLIR_HANDLE(MlirBlock, mlirBlockD
using
mlir_pass_manager
=
MIGRAPHX_MANAGE_MLIR_HANDLE
(
MlirPassManager
,
mlirPassManagerDestroy
);
using
mlir_tuning_table
=
MIGRAPHX_MANAGE_MLIR_HANDLE
(
MlirRockTuningTable
,
mlirRockTuningTableDestroy
);
using
mlir_tuning_space
=
MIGRAPHX_MANAGE_MLIR_HANDLE
(
MlirRockTuningSpace
,
mlirRockTuningSpaceDestroy
);
using
mlir_tuning_param
=
MIGRAPHX_MANAGE_MLIR_HANDLE
(
MlirRockTuningParam
,
mlirRockTuningParamDestroy
);
std
::
string_view
to_string_view
(
MlirStringRef
s
)
{
return
{
s
.
data
,
s
.
length
};
}
...
...
@@ -164,25 +176,41 @@ std::string mlir_print(F f, T x)
return
ss
.
str
();
}
const
std
::
unordered_set
<
std
::
string
>&
get_xdlops_archs
()
{
static
std
::
unordered_set
<
std
::
string
>
supported_archs
{
"gfx908"
,
"gfx90a"
};
return
supported_archs
;
}
struct
mlir_program
{
mlir_program
()
:
ctx
(
mlirContextCreate
()),
:
ctx
(
mlirContextCreateWithRegistry
(
get_dialect_registry
().
get
(),
/*threadingEnable=*/
false
)),
location
(
mlirLocationUnknownGet
(
ctx
.
get
())),
mmodule
(
mlirModuleCreateEmpty
(
location
))
{
MlirDialectRegistry
registry
=
mlirDialectRegistryCreate
();
mlirRegisterRocMLIRDialects
(
registry
);
mlirContextAppendDialectRegistry
(
ctx
.
get
(),
registry
);
mlirContextSetThreadPool
(
ctx
.
get
(),
get_thread_pool
().
get
());
mlirContextLoadAllAvailableDialects
(
ctx
.
get
());
mlirDialectRegistryDestroy
(
registry
);
mlirContextSetAllowUnregisteredDialects
(
ctx
.
get
(),
true
/*allow*/
);
}
static
mlir_dialect_registry
&
get_dialect_registry
()
{
static
std
::
once_flag
init_guard
;
static
mlir_dialect_registry
the_registry
;
// The MLIR registration functions (for dialects and passes) are not
// necessarily thread-safe and need to be executed exactly once
// (especially since they eventually call non-thread-safe LLVM
// initilizations).
std
::
call_once
(
init_guard
,
[
&
]()
{
the_registry
=
mlirDialectRegistryCreate
();
mlirRegisterRocMLIRDialects
(
the_registry
.
get
());
mlirRegisterRocMLIRPasses
();
});
return
the_registry
;
}
static
mlir_thread_pool
&
get_thread_pool
()
{
// To save on overhead, we create one LLVM thread pool and reuse it
// across all MLIR contexts as recommended by MLIR upstream.
// Note that this is thread-safe as of C++11.
static
mlir_thread_pool
the_pool
=
mlirLlvmThreadPoolCreate
();
return
the_pool
;
}
MlirType
make_type
(
shape
::
type_t
t
)
const
...
...
@@ -244,8 +272,6 @@ struct mlir_program
MlirAttribute
attribute
(
std
::
int64_t
i
)
const
{
if
(
i
<
0
)
MIGRAPHX_THROW
(
"MLIR cant handle negative values since they are ambiguous"
);
return
mlirIntegerAttrGet
(
mlirIntegerTypeGet
(
ctx
.
get
(),
64
),
i
);
}
MlirAttribute
attribute
(
std
::
uint64_t
i
)
const
...
...
@@ -324,7 +350,8 @@ struct mlir_program
std
::
string
,
value
,
std
::
vector
<
value
>
,
MlirType
>
;
MlirType
,
MlirAttribute
>
;
using
named_attribute_t
=
std
::
pair
<
std
::
string_view
,
attribute_t
>
;
MlirNamedAttribute
name_attribute
(
const
named_attribute_t
&
na
)
const
...
...
@@ -365,14 +392,20 @@ struct mlir_program
mlir_operation_state
&
add_attributes
(
const
std
::
vector
<
named_attribute_t
>&
named_attrs
)
{
auto
attributes
=
prog
->
name_attributes
(
named_attrs
);
if
(
not
attributes
.
empty
())
{
mlirOperationStateAddAttributes
(
&
op_state
,
attributes
.
size
(),
attributes
.
data
());
}
return
*
this
;
}
mlir_operation_state
&
add_attribute_value
(
const
value
&
v
)
{
auto
attributes
=
prog
->
name_attributes
(
v
);
if
(
not
attributes
.
empty
())
{
mlirOperationStateAddAttributes
(
&
op_state
,
attributes
.
size
(),
attributes
.
data
());
}
return
*
this
;
}
...
...
@@ -395,13 +428,19 @@ struct mlir_program
return
shape
{
r
.
type
(),
r
.
lens
()};
});
auto
x
=
prog
->
make_tensors
(
reshaped
);
if
(
not
x
.
empty
())
{
mlirOperationStateAddResults
(
&
op_state
,
x
.
size
(),
x
.
data
());
}
return
*
this
;
}
mlir_operation_state
&
add_operands
(
const
std
::
vector
<
MlirValue
>&
inputs
)
{
if
(
not
inputs
.
empty
())
{
mlirOperationStateAddOperands
(
&
op_state
,
inputs
.
size
(),
inputs
.
data
());
}
return
*
this
;
}
...
...
@@ -411,7 +450,10 @@ struct mlir_program
std
::
transform
(
regions
.
begin
(),
regions
.
end
(),
mregions
.
begin
(),
[](
const
auto
&
r
)
{
return
r
.
get
();
});
if
(
not
mregions
.
empty
())
{
mlirOperationStateAddOwnedRegions
(
&
op_state
,
mregions
.
size
(),
mregions
.
data
());
}
mlir_operation
op
(
mlirOperationCreate
(
&
op_state
));
// Release memory since mlir_operation owns it
for
(
auto
&
r
:
regions
)
...
...
@@ -468,7 +510,8 @@ struct mlir_program
ops
.
add_attributes
({{
"function_type"
,
make_function_type
(
inputs
,
outputs
)},
{
"sym_name"
,
sym_name
},
{
"kernel"
,
std
::
string
(
"mixr"
)},
{
"arch"
,
target_arch
}});
{
"arch"
,
target_arch
},
{
"num_cu"
,
num_cu
}});
ops
.
add_region
(
std
::
move
(
region
));
insert
(
body
,
std
::
move
(
ops
));
...
...
@@ -481,6 +524,10 @@ struct mlir_program
{
if
(
ins
->
name
()
==
"@return"
)
return
"func.return"
;
if
(
ins
->
name
()
==
"@literal"
)
{
return
"tosa.const"
;
}
return
"migraphx."
+
ins
->
name
();
}
...
...
@@ -511,14 +558,7 @@ struct mlir_program
static
std
::
string
get_symbol_name
(
const
module
&
m
)
{
for
(
auto
ins
:
iterator_for
(
m
))
{
if
(
ins
->
name
()
==
"convolution"
or
ins
->
name
()
==
"dot"
)
{
return
"mlir_"
+
ins
->
name
();
}
}
return
"main"
;
return
"mlir_"
+
gen
::
generate_name_from_ops
(
m
);
}
void
parse
(
const
module
&
m
)
...
...
@@ -532,20 +572,28 @@ struct mlir_program
{
if
(
ins
->
name
()
==
"@param"
)
continue
;
if
(
ins
->
name
()
==
"contiguous"
)
{
ins_map
[
ins
]
=
ins_map
[
ins
->
inputs
().
at
(
0
)];
continue
;
}
auto
name
=
get_name
(
ins
);
auto
ops
=
create_operation_state
(
name
);
ops
.
add_attribute_value
(
get_operator_value
(
ins
->
get_operator
()));
if
(
ins
->
name
()
!=
"@return"
)
ops
.
add_results
({
get_shape
(
ins
)});
if
(
ins
->
name
()
==
"@literal"
)
{
literal
r
=
ins
->
get_literal
();
MlirType
tensor_type
=
make_tensor
(
ins
->
get_shape
());
MlirAttribute
mlir_value_attr
=
mlirDenseElementsAttrRawBufferGet
(
tensor_type
,
r
.
get_shape
().
bytes
(),
r
.
data
());
ops
.
add_attributes
({{
"value"
,
mlir_value_attr
}});
}
if
(
ins
->
name
()
==
"convolution"
or
ins
->
name
()
==
"dot"
)
{
pp
=
problem_params
{
ins
->
get_operator
(),
to_shapes
(
ins
->
inputs
()),
ins
->
get_shape
()};
// check if HW supports xdlops
auto
target_chip
=
trim
(
split_string
(
target_arch
,
':'
).
front
());
bool
xdlops
=
contains
(
get_xdlops_archs
(),
target_chip
);
if
(
xdlops
)
ops
.
add_attributes
({{
"xdlopsV2"
,
true
}});
}
std
::
vector
<
MlirValue
>
inputs
;
...
...
@@ -562,18 +610,30 @@ struct mlir_program
}
}
code_object_op
compil
e
()
MIGRAPHX_TIDY_CONST
void
run_high_level_pipelin
e
()
MIGRAPHX_TIDY_CONST
{
mlir_pass_manager
pm_front
{
mlirPassManagerCreate
(
ctx
.
get
())};
mlir_pass_manager
pm_back
{
mlirPassManagerCreate
(
ctx
.
get
())};
// 1st pipeline to call
mlirMIGraphXAddHighLevelPipeline
(
pm_front
.
get
());
mlirPassManagerRun
(
pm_front
.
get
(),
mmodule
.
get
());
mlirPassManagerRunOnOp
(
pm_front
.
get
(),
mlirModuleGetOperation
(
mmodule
.
get
()));
}
// 2nd pipeline to call
get_module_tuned
();
void
run_backend_pipeline
()
MIGRAPHX_TIDY_CONST
{
mlir_pass_manager
pm_back
{
mlirPassManagerCreate
(
ctx
.
get
())};
mlirMIGraphXAddBackendPipeline
(
pm_back
.
get
(),
target_arch
.
c_str
());
mlirPassManagerRun
(
pm_back
.
get
(),
mmodule
.
get
());
mlirPassManagerRunOnOp
(
pm_back
.
get
(),
mlirModuleGetOperation
(
mmodule
.
get
()));
}
code_object_op
compile
(
const
value
&
solution
)
MIGRAPHX_TIDY_CONST
{
// 1st pipeline to call
run_high_level_pipeline
();
if
(
solution
.
is_null
())
get_module_tuned
();
else
set_tuning
(
solution
);
// 2nd pipeline to call
run_backend_pipeline
();
code_object_op
op
{};
op
.
symbol_name
=
sym_name
;
...
...
@@ -582,7 +642,12 @@ struct mlir_program
return
op
;
}
void
find_target
()
{
target_arch
=
get_device_name
();
}
void
set_gpu_properties
(
const
context
&
migraphx_ctx
)
{
const
auto
&
device
=
migraphx_ctx
.
get_current_device
();
target_arch
=
device
.
get_device_name
();
num_cu
=
device
.
get_cu_count
();
}
std
::
pair
<
std
::
size_t
,
std
::
size_t
>
get_launch_params
()
const
{
...
...
@@ -596,7 +661,7 @@ struct mlir_program
value
::
binary
get_binary
()
const
{
in
t
size
=
0
;
size_
t
size
=
0
;
mlirGetBinary
(
mmodule
.
get
(),
&
size
,
nullptr
);
value
::
binary
result
(
size
);
if
(
mlirGetBinary
(
mmodule
.
get
(),
&
size
,
reinterpret_cast
<
char
*>
(
result
.
data
())))
...
...
@@ -604,14 +669,52 @@ struct mlir_program
MIGRAPHX_THROW
(
"Failed to compile mlir program"
);
}
void
set_tuning
(
const
value
&
v
)
MIGRAPHX_TIDY_CONST
{
const
auto
*
str
=
v
.
if_string
();
if
(
str
==
nullptr
)
MIGRAPHX_THROW
(
"mlir tuning solutions must be strings"
);
if
(
not
mlirRockTuningSetFromStr
(
mmodule
.
get
(),
make_mlir_string_ref
(
*
str
)))
MIGRAPHX_THROW
(
"Failed setting tuning key: "
+
*
str
);
}
tuning_config
get_tuning_config
()
MIGRAPHX_TIDY_CONST
{
tuning_config
tc
;
run_high_level_pipeline
();
mlir_tuning_space
params
{
mlirRockTuningSpaceCreate
(
mmodule
.
get
(),
RocmlirTuningParamSetKindFull
)};
for
(
auto
i
:
range
(
mlirRockTuningGetNumParams
(
params
.
get
())))
{
mlir_tuning_param
param
{
mlirRockTuningParamCreate
()};
if
(
not
mlirRockTuningParamGet
(
params
.
get
(),
i
,
param
.
get
()))
MIGRAPHX_THROW
(
"Incorrect mlir tuning parameter: "
+
std
::
to_string
(
i
));
std
::
array
<
char
,
ROCMLIR_TUNING_KEY_BUFSZ
>
perf_key
;
size_t
perf_key_bytes
=
mlirRockTuningParamToString
(
param
.
get
(),
perf_key
.
data
(),
perf_key
.
size
());
if
(
perf_key_bytes
>
perf_key
.
size
())
MIGRAPHX_THROW
(
"Tuning perf key was "
+
std
::
to_string
(
perf_key_bytes
)
+
" bytes and thus too long"
);
tc
.
solutions
.
emplace_back
(
perf_key
.
begin
(),
perf_key
.
begin
()
+
perf_key_bytes
);
}
std
::
array
<
char
,
ROCMLIR_TUNING_KEY_BUFSZ
>
tuning_key
;
size_t
tuning_key_bytes
=
mlirRockTuningGetKey
(
mmodule
.
get
(),
tuning_key
.
data
(),
tuning_key
.
size
());
if
(
tuning_key_bytes
>
tuning_key
.
size
())
MIGRAPHX_THROW
(
"Tuning table key was "
+
std
::
to_string
(
tuning_key_bytes
)
+
" bytes and thus too long"
);
tc
.
problem
=
std
::
string
(
tuning_key
.
begin
(),
tuning_key
.
begin
()
+
tuning_key_bytes
);
return
tc
;
}
std
::
string
get_tune_params
(
bool
xdlops
)
const
{
return
get_mlir_perf_for_conv
(
pp
,
xdlops
);
}
// This function appends to tuning cfg file that could be
// used with rocMLIR tuning scripts.
void
dump_tuning_cfg
(
const
char
*
prob_config
)
const
void
dump_tuning_cfg
(
const
std
::
string
&
prob_config
)
const
{
std
::
string
tuning_cfg_path
=
string_value_of
(
MIGRAPHX_MLIR_TUNING_CFG
{});
if
(
!
tuning_cfg_path
.
empty
())
if
(
not
tuning_cfg_path
.
empty
())
{
std
::
vector
<
std
::
string
>
tokens
=
split_string
(
prob_config
,
'\t'
);
std
::
string
prob
=
tokens
[
1
];
...
...
@@ -628,46 +731,66 @@ struct mlir_program
}
}
static
mlir_tuning_table
create
_tuning_table
()
static
std
::
pair
<
mlir_tuning_table
,
bool
>
load
_tuning_table
()
{
mlir_tuning_table
tuning_table
{
mlirRockTuningTableCreate
()};
bool
found_table
=
false
;
std
::
string
tuning_db_path
=
string_value_of
(
MIGRAPHX_MLIR_TUNING_DB
{});
if
(
!
tuning_db_path
.
empty
())
if
(
not
tuning_db_path
.
empty
())
{
std
::
ifstream
tuning_db_tsv
(
tuning_db_path
);
if
(
tuning_db_tsv
)
{
found_table
=
true
;
std
::
string
line
;
while
(
std
::
getline
(
tuning_db_tsv
,
line
))
{
std
::
vector
<
std
::
string
>
tokens
=
split_string
(
line
,
'\t'
);
std
::
string
arch
=
tokens
[
0
];
std
::
string
prob
=
tokens
[
1
];
std
::
string
perf
=
tokens
[
2
];
std
::
string
key
=
arch
.
append
(
"
\t
"
).
append
(
prob
);
mlirRockTuningUpdateTable
(
tuning_table
.
get
(),
key
.
c_str
(),
perf
.
c_str
(),
1.0
);
std
::
string
num_cu
=
tokens
[
1
];
std
::
string
prob
=
tokens
[
2
];
std
::
string
perf
=
tokens
[
3
];
std
::
string
key
=
arch
.
append
(
"
\t
"
).
append
(
num_cu
).
append
(
"
\t
"
).
append
(
prob
);
mlirRockTuningUpdateTable
(
tuning_table
.
get
(),
make_mlir_string_ref
(
key
),
make_mlir_string_ref
(
perf
),
1.0
);
}
}
}
else
{
found_table
=
false
;
std
::
cerr
<<
"WARNING: MLIR tuning db not found. Please set MIGRAPHX_MLIR_TUNING_DB for "
"optimal performance."
<<
std
::
endl
;
}
return
tuning_table
;
return
std
::
make_pair
(
std
::
move
(
tuning_table
),
found_table
)
;
}
bool
get_module_tuned
()
const
{
static
mlir_tuning_table
tuning_table
=
create_tuning_table
();
if
(
!
mlirRockTuningSetFromTable
(
tuning_table
.
get
(),
mmodule
.
get
()))
static
std
::
pair
<
mlir_tuning_table
,
bool
>
tuning_table
=
load_tuning_table
();
if
(
not
mlirRockTuningSetFromTable
(
tuning_table
.
first
.
get
(),
mmodule
.
get
()))
{
std
::
array
<
char
,
ROCMLIR_TUNING_KEY_BUFSZ
>
prob_config
;
size_t
prob_config_bytes
=
mlirRockTuningGetKey
(
mmodule
.
get
(),
prob_config
.
data
(),
prob_config
.
size
());
if
(
prob_config_bytes
>=
prob_config
.
size
())
{
const
char
*
prob_config
=
mlirRockTuningGetKey
(
tuning_table
.
get
(),
mmodule
.
get
());
std
::
stringstream
key
(
prob_config
);
std
::
cerr
<<
"fails to set param on"
<<
prob_config
<<
std
::
endl
;
dump_tuning_cfg
(
prob_config
);
std
::
cerr
<<
"MLIR tuning key overflowed buffer, needed "
<<
prob_config_bytes
<<
" bytes"
<<
std
::
endl
;
return
false
;
}
std
::
string
prob_config_str
(
prob_config
.
begin
(),
prob_config
.
begin
()
+
prob_config_bytes
);
if
(
tuning_table
.
second
)
{
std
::
cerr
<<
"NOTE: MLIR tuning table did not include a key for "
<<
prob_config_str
<<
std
::
endl
;
}
dump_tuning_cfg
(
prob_config_str
);
return
false
;
}
return
true
;
...
...
@@ -678,7 +801,8 @@ struct mlir_program
mlir_module
mmodule
;
problem_params
pp
;
std
::
deque
<
std
::
string
>
strings
{};
std
::
string
target_arch
;
std
::
string
target_arch
=
""
;
std
::
size_t
num_cu
=
0
;
std
::
string
sym_name
;
};
...
...
@@ -690,14 +814,14 @@ std::string dump_mlir(const module& m)
return
mlir_print
(
&
mlirOperationPrint
,
mod_op
);
}
void
adjust_param_shapes
(
module
&
m
,
const
std
::
vector
<
instruction_ref
>&
inputs
)
void
adjust_param_shapes
(
module
&
m
,
const
std
::
vector
<
shape
>&
inputs
)
{
auto
names
=
m
.
get_parameter_names
();
std
::
sort
(
names
.
begin
(),
names
.
end
());
for
(
auto
i
:
range
(
names
.
size
()))
{
const
auto
&
name
=
names
[
i
];
const
auto
&
input
=
inputs
[
i
]
->
get_shape
()
;
const
auto
&
input
=
inputs
[
i
];
auto
param
=
m
.
get_parameter
(
name
);
if
(
input
.
standard
())
continue
;
...
...
@@ -735,23 +859,25 @@ void adjust_param_shapes(module& m, const std::vector<instruction_ref>& inputs)
}
}
code_object_op
compile_mlir
(
const
context
&
,
module
m
,
const
std
::
vector
<
instruction_ref
>&
inputs
)
code_object_op
compile_mlir
(
const
context
&
migraphx_ctx
,
module
m
,
const
std
::
vector
<
instruction_ref
>&
inputs
,
const
value
&
solution
)
{
adjust_param_shapes
(
m
,
inputs
);
adjust_param_shapes
(
m
,
to_shapes
(
inputs
)
)
;
const
bool
trace
=
enabled
(
MIGRAPHX_TRACE_MLIR
{});
if
(
trace
)
std
::
cout
<<
m
<<
std
::
endl
;
// set mutex while llvm thread support is disabled.
static
std
::
mutex
g_mlirc_mutex
;
// NOLINT
const
std
::
lock_guard
<
std
::
mutex
>
lock
(
g_mlirc_mutex
);
mlir_program
mp
;
mp
.
find_target
(
);
mp
.
set_gpu_properties
(
migraphx_ctx
);
mp
.
parse
(
m
);
auto
mod_op
=
mlirModuleGetOperation
(
mp
.
mmodule
.
get
());
if
(
trace
)
std
::
cout
<<
mlir_print
(
&
mlirOperationPrint
,
mod_op
)
<<
std
::
endl
;
auto
co
=
mp
.
compile
();
auto
co
=
mp
.
compile
(
solution
);
co
.
expected_inputs
=
to_shapes
(
inputs
);
co
.
output
=
m
.
get_output_shapes
().
front
();
return
co
;
}
...
...
@@ -772,6 +898,17 @@ instruction_ref insert_mlir(module& m,
return
m
.
insert_instruction
(
ins
,
co
,
refs
);
}
tuning_config
get_tuning_config_mlir
(
const
context
&
migraphx_ctx
,
module
m
,
const
std
::
vector
<
shape
>&
inputs
)
{
adjust_param_shapes
(
m
,
inputs
);
mlir_program
mp
;
mp
.
set_gpu_properties
(
migraphx_ctx
);
mp
.
parse
(
m
);
return
mp
.
get_tuning_config
();
}
#else
std
::
string
dump_mlir
(
const
module
&
)
{
return
{};
}
...
...
@@ -783,20 +920,27 @@ void use(T&)
// Disabling clang-tidy warning on non-real useage.
// NOLINTBEGIN(performance-unnecessary-value-param)
code_object_op
compile_mlir
(
const
context
&
,
module
,
const
std
::
vector
<
instruction_ref
>&
)
code_object_op
compile_mlir
(
const
context
&
,
module
,
const
std
::
vector
<
instruction_ref
>&
,
const
value
&
)
{
return
{};
}
// NOLINTEND(performance-unnecessary-value-param)
instruction_ref
// cppcheck-suppress funcArgNamesDifferent
insert_mlir
(
module
&
m
,
instruction_ref
,
code_object_op
co
,
const
std
::
vector
<
instruction_ref
>&
)
{
use
(
co
);
use
(
m
);
return
m
.
end
();
}
tuning_config
get_tuning_config_mlir
(
const
context
&
,
module
,
const
std
::
vector
<
shape
>&
)
{
return
{};
}
// NOLINTEND(performance-unnecessary-value-param)
#endif
}
// namespace gpu
...
...
src/targets/gpu/rocblas.cpp
View file @
23cb7917
...
...
@@ -47,32 +47,24 @@ rocblas_handle_ptr create_rocblas_handle_ptr(hipStream_t s)
return
rb
;
}
const
std
::
unordered_set
<
std
::
string
>&
get_rocblas_fp32_archs
()
{
static
std
::
unordered_set
<
std
::
string
>
supported_archs
{
"gfx908"
,
"gfx90a"
};
return
supported_archs
;
}
bool
get_compute_fp32_flag
()
{
bool
compute_fp32
=
false
;
#if ROCBLAS_VERSION_MAJOR >= 2 && ROCBLAS_VERSION_MINOR >= 38
const
auto
device_name
=
trim
(
split_string
(
get_device_name
(),
':'
).
front
());
if
(
contains
(
get_rocblas_fp32_archs
(),
device_name
))
compute_fp32
=
true
;
#endif
return
compute_fp32
;
return
(
starts_with
(
device_name
,
"gfx9"
)
and
device_name
>=
"gfx908"
);
}
bool
get_int8_x4_format
(
context
&
ctx
)
{
bool
int8_x4_format
=
true
;
#if ROCBLAS_VERSION_MAJOR >= 2 && ROCBLAS_VERSION_MINOR >= 38
#if ROCBLAS_VERSION_MAJOR >= 3
(
void
)(
ctx
);
return
false
;
#else
// int8x4 packed format is only available starting from rocblas-v2.38 and it is deprecated in
// v3.0 and will be removed in v4.0
rocblas_gemm_flags
flag
;
rocblas_query_int8_layout_flag
(
ctx
.
get_stream
().
get_rocblas
(),
&
flag
);
int8_x4_format
=
(
flag
==
rocblas_gemm_flags_pack_int8x4
)
;
return
flag
==
rocblas_gemm_flags_pack_int8x4
;
#endif
return
int8_x4_format
;
}
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
...
...
src/targets/gpu/target.cpp
View file @
23cb7917
...
...
@@ -57,6 +57,7 @@
#include <migraphx/gpu/concat_gpu_opt.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/device_name.hpp>
#include <migraphx/gpu/fuse_ck.hpp>
#include <migraphx/gpu/fuse_mlir.hpp>
#include <migraphx/gpu/fuse_ops.hpp>
#include <migraphx/gpu/prefuse_ops.hpp>
...
...
@@ -72,9 +73,12 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace
gpu
{
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_DISABLE_SCHEDULE_PASS
)
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_DISABLE_POINTWISE_FUSION
)
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_DISABLE_REDUCE_FUSION
)
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_ENABLE_NHWC
)
#ifndef _WIN32
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_ENABLE_CK
)
#endif
struct
id_pass
{
std
::
string
name
()
const
{
return
"id"
;
}
...
...
@@ -98,16 +102,17 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
unsupported_types
.
erase
(
shape
::
type_t
::
bool_type
);
unsupported_types
.
erase
(
shape
::
type_t
::
int8_type
);
unsupported_types
.
erase
(
shape
::
type_t
::
uint8_type
);
unsupported_types
.
erase
(
shape
::
type_t
::
int32_type
);
unsupported_types
.
erase
(
shape
::
type_t
::
tuple_type
);
// clang-format off
return
{
enable_pass
(
options
.
split_single_dyn_dim
,
split_single_dyn_dim
{}
)
,
enable_pass
(
options
.
split_single_dyn_dim
,
dead_code_elimination
{}
)
,
split_single_dyn_dim
{},
dead_code_elimination
{},
normalize_ops
{},
dead_code_elimination
{},
simplify_qdq
{},
rewrite_quantization
{},
enable_pass
(
not
mlir_enabled
(),
rewrite_quantization
{}
)
,
dead_code_elimination
{},
eliminate_data_type
{
unsupported_types
,
shape
::
type_t
::
float_type
},
simplify_reshapes
{},
...
...
@@ -121,7 +126,7 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
inline_module
{},
rewrite_pooling
{},
dead_code_elimination
{},
rewrite_gelu
{},
enable_pass
(
options
.
fast_math
,
rewrite_gelu
{}
)
,
optimize_module
{},
enable_pass
(
enabled
(
MIGRAPHX_ENABLE_NHWC
{}),
layout_nhwc
{}),
dead_code_elimination
{},
...
...
@@ -129,11 +134,15 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
dead_code_elimination
{},
auto_contiguous
{},
optimize_module
{},
enable_pass
(
not
enabled
(
MIGRAPHX_DISABLE_POINTWISE_FUSION
{}),
fuse_pointwise
{}
)
,
fuse_pointwise
{},
dead_code_elimination
{},
enable_pass
(
not
enabled
(
MIGRAPHX_DISABLE_REDUCE_FUSION
{}),
fuse_reduce
{}),
dead_code_elimination
{},
fuse_mlir
{
&
ctx
},
#ifndef _WIN32
enable_pass
(
enabled
(
MIGRAPHX_ENABLE_CK
{}),
fuse_ck
{}),
#endif
dead_code_elimination
{},
enable_pass
(
mlir_enabled
(),
fuse_mlir
{
&
ctx
}),
dead_code_elimination
{},
lowering
{
&
ctx
,
options
.
offload_copy
},
eliminate_contiguous
{
"gpu::contiguous"
},
...
...
@@ -150,7 +159,7 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
dead_code_elimination
{},
adjust_allocation
{
gpu_allocation_model
{}},
dead_code_elimination
{},
compile_ops
{
&
ctx
},
compile_ops
{
&
ctx
,
options
.
exhaustive_tune
},
dead_code_elimination
{},
promote_literals
{},
dead_code_elimination
{},
...
...
src/targets/gpu/
driver/perf
.cpp
→
src/targets/gpu/
time_op
.cpp
View file @
23cb7917
...
...
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/gpu/
driver/perf
.hpp>
#include <migraphx/gpu/
time_op
.hpp>
#include <migraphx/context.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/time.hpp>
...
...
@@ -30,12 +30,11 @@
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
driver
{
std
::
vector
<
argument
>
generate_arguments
(
const
std
::
vector
<
shape
>&
shapes
,
unsigned
long
seed
=
0
)
{
std
::
vector
<
argument
>
args
;
std
::
transform
(
shapes
.
begin
(),
shapes
.
end
(),
std
::
back_inserter
(
args
),
[
&
](
auto
&
s
)
{
std
::
transform
(
shapes
.
begin
(),
shapes
.
end
(),
std
::
back_inserter
(
args
),
[
&
](
const
auto
&
s
)
{
return
to_gpu
(
generate_argument
(
s
,
seed
++
));
});
return
args
;
...
...
@@ -69,7 +68,6 @@ time_op(context& ictx, operation op, const std::vector<shape>& inputs, int n)
return
std
::
make_pair
(
host_time
/
n
,
device_time
/
n
);
}
}
// namespace driver
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/ref/CMakeLists.txt
View file @
23cb7917
...
...
@@ -37,6 +37,8 @@ target_link_libraries(migraphx_ref PUBLIC migraphx)
target_include_directories
(
migraphx_ref PRIVATE
${
BLAZE_INCLUDE
}
)
target_compile_definitions
(
migraphx_ref PRIVATE -DBLAZE_USE_CPP_THREADS
)
migraphx_generate_export_header
(
migraphx_ref
)
rocm_install_targets
(
TARGETS migraphx_ref
INCLUDE
...
...
src/targets/ref/include/migraphx/ref/context.hpp
View file @
23cb7917
...
...
@@ -25,6 +25,7 @@
#define MIGRAPHX_GUARD_RTGLIB_CONTEXT_HPP
#include <migraphx/config.hpp>
#include <migraphx/ref/export.h>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
...
src/targets/ref/include/migraphx/ref/lowering.hpp
View file @
23cb7917
...
...
@@ -24,14 +24,14 @@
#ifndef MIGRAPHX_GUARD_RTGLIB_CPU_LOWERING_HPP
#define MIGRAPHX_GUARD_RTGLIB_CPU_LOWERING_HPP
#include <migraphx/ref/context.hpp>
#include <migraphx/program.hpp>
#include <migraphx/config.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
ref
{
struct
lowering
struct
MIGRAPHX_REF_EXPORT
lowering
{
std
::
string
name
()
const
{
return
"ref::lowering"
;
}
void
apply
(
module
&
m
)
const
;
...
...
src/targets/ref/include/migraphx/ref/target.hpp
View file @
23cb7917
...
...
@@ -35,7 +35,7 @@ inline namespace MIGRAPHX_INLINE_NS {
struct
pass
;
namespace
ref
{
struct
target
struct
MIGRAPHX_REF_EXPORT
target
{
std
::
string
name
()
const
;
std
::
vector
<
pass
>
get_passes
(
migraphx
::
context
&
ctx
,
const
compile_options
&
)
const
;
...
...
src/targets/ref/lowering.cpp
View file @
23cb7917
...
...
@@ -27,7 +27,7 @@
#include <migraphx/dfor.hpp>
#include <migraphx/op/identity.hpp>
#include <migraphx/op/convolution.hpp>
#include <migraphx/op/
de
convolution.hpp>
#include <migraphx/op/convolution
_backwards
.hpp>
#include <migraphx/op/quant_convolution.hpp>
#include <migraphx/op/dot.hpp>
#include <migraphx/op/quant_dot.hpp>
...
...
src/tf/CMakeLists.txt
View file @
23cb7917
...
...
@@ -42,8 +42,9 @@ target_compile_options(tf-proto PRIVATE -w)
target_link_libraries
(
tf-proto PRIVATE
${
PROTOBUF_LIBRARY
}
)
set_target_properties
(
tf-proto PROPERTIES POSITION_INDEPENDENT_CODE On
)
file
(
GLOB TF_SRCS
${
CONFIGURE_DEPENDS
}
*.cpp
)
file
(
GLOB TF_SRCS CONFIGURE_DEPENDS *.cpp
)
add_library
(
migraphx_tf
${
TF_SRCS
}
)
migraphx_generate_export_header
(
migraphx_tf
)
target_include_directories
(
migraphx_tf PRIVATE include
)
set_target_properties
(
migraphx_tf PROPERTIES EXPORT_NAME tf
)
rocm_set_soversion
(
migraphx_tf
${
MIGRAPHX_SO_VERSION
}
)
...
...
src/tf/op_parser.cpp
View file @
23cb7917
...
...
@@ -46,6 +46,7 @@ std::vector<std::string> get_op_parsers()
op_parser_map
().
end
(),
std
::
back_inserter
(
result
),
[
&
](
auto
&&
p
)
{
return
p
.
first
;
});
std
::
sort
(
result
.
begin
(),
result
.
end
());
return
result
;
}
...
...
src/tf/parse_batchnorm.cpp
View file @
23cb7917
...
...
@@ -52,7 +52,6 @@ struct parse_batchnorm : op_parser<parse_batchnorm>
auto
x_type
=
args
[
0
]
->
get_shape
().
type
();
// unsqueeze tensors of shape (C) to broadcast correctly
auto
rt
=
info
.
add_literal
(
migraphx
::
literal
{
migraphx
::
shape
{
x_type
},
{
0.5
}});
auto
eps
=
info
.
add_literal
(
migraphx
::
literal
{
migraphx
::
shape
{
x_type
},
{
epsilon
}});
auto
scale_unsqueeze
=
...
...
@@ -64,11 +63,11 @@ struct parse_batchnorm : op_parser<parse_batchnorm>
auto
var_unsqueeze
=
info
.
add_instruction
(
migraphx
::
make_op
(
"unsqueeze"
,
{{
"axes"
,
{
1
,
2
}}}),
args
[
4
]);
auto
numer
=
info
.
add_broadcastable_binary_op
(
"sub"
,
args
[
0
],
mean_unsqueeze
);
auto
x_sub_mean
=
info
.
add_broadcastable_binary_op
(
"sub"
,
args
[
0
],
mean_unsqueeze
);
auto
var_eps
=
info
.
add_broadcastable_binary_op
(
"add"
,
var_unsqueeze
,
eps
);
auto
denom
=
info
.
add_
broadcastable_binary_op
(
"pow"
,
var_eps
,
rt
);
auto
div0
=
info
.
add_broadcastable_binary_op
(
"
div"
,
numer
,
denom
);
auto
r0
=
info
.
add_broadcastable_binary_op
(
"mul"
,
div0
,
scale_unsqueeze
);
auto
rsqrt
=
info
.
add_
instruction
(
make_op
(
"rsqrt"
)
,
var_eps
);
auto
mul0
=
info
.
add_broadcastable_binary_op
(
"
mul"
,
scale_unsqueeze
,
rsqrt
);
auto
r0
=
info
.
add_broadcastable_binary_op
(
"mul"
,
x_sub_mean
,
mul0
);
return
info
.
add_broadcastable_binary_op
(
"add"
,
r0
,
bias_unsqueeze
);
}
};
...
...
src/tf/tf.cpp
View file @
23cb7917
...
...
@@ -22,6 +22,7 @@
* THE SOFTWARE.
*/
#include <migraphx/tf/tf_parser.hpp>
#include <migraphx/tf/op_parser.hpp>
#include <iostream>
#include <fstream>
#include <unordered_map>
...
...
@@ -62,5 +63,7 @@ program parse_tf(const std::string& name, const tf_options& options)
return
std
::
move
(
parser
.
prog
);
}
std
::
vector
<
std
::
string
>
get_tf_operators
()
{
return
tf
::
get_op_parsers
();
}
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/tf/tf_parser.cpp
View file @
23cb7917
...
...
@@ -338,7 +338,7 @@ void tf_parser::parse_node(const std::string& name)
std
::
string
input_name
=
input
;
// if input has trailing `:0` index then remove it
auto
multi_out_idx
=
input
.
find
(
':'
);
if
(
multi_out_idx
!=
std
::
string
::
npos
&&
input
.
substr
(
multi_out_idx
+
1
)
==
"0"
)
if
(
multi_out_idx
!=
std
::
string
::
npos
and
input
.
substr
(
multi_out_idx
+
1
)
==
"0"
)
{
input_name
=
input
.
substr
(
0
,
multi_out_idx
);
}
...
...
src/value.cpp
View file @
23cb7917
...
...
@@ -28,6 +28,7 @@
#include <migraphx/stringutils.hpp>
#include <migraphx/value.hpp>
#include <migraphx/optional.hpp>
#include <migraphx/hash.hpp>
#include <unordered_map>
#include <utility>
...
...
@@ -284,7 +285,7 @@ bool value::contains(const std::string& pkey) const
}
std
::
size_t
value
::
size
()
const
{
auto
*
a
=
if_array_impl
(
x
);
const
auto
*
a
=
if_array_impl
(
x
);
if
(
a
==
nullptr
)
return
0
;
return
a
->
size
();
...
...
@@ -519,6 +520,38 @@ std::ostream& operator<<(std::ostream& os, const value& d)
return
os
;
}
template
<
class
T
>
std
::
size_t
value_hash
(
const
std
::
string
&
key
,
const
T
&
x
)
{
std
::
size_t
h
=
hash_value
(
key
);
hash_combine
(
h
,
x
);
return
h
;
}
std
::
size_t
value_hash
(
const
std
::
string
&
key
,
std
::
nullptr_t
)
{
return
hash_value
(
key
);
}
std
::
size_t
value_hash
(
const
std
::
string
&
key
,
const
std
::
vector
<
value
>&
x
)
{
std
::
size_t
h
=
hash_value
(
key
);
for
(
const
auto
&
v
:
x
)
hash_combine
(
h
,
v
);
return
h
;
}
std
::
size_t
value_hash
(
const
std
::
string
&
key
,
const
value
::
binary
&
x
)
{
std
::
size_t
h
=
hash_value
(
key
);
for
(
const
auto
&
v
:
x
)
hash_combine
(
h
,
v
);
return
h
;
}
std
::
size_t
value
::
hash
()
const
{
std
::
size_t
h
=
0
;
this
->
visit_value
([
&
](
const
auto
&
a
)
{
h
=
value_hash
(
this
->
get_key
(),
a
);
});
return
h
;
}
void
value
::
debug_print
(
bool
show_type
)
const
{
if
(
show_type
)
...
...
src/verify_args.cpp
View file @
23cb7917
...
...
@@ -35,7 +35,7 @@ bool verify_args(const std::string& name,
bool
passed
=
true
;
visit_all
(
ref_arg
,
target_arg
)([
&
](
auto
ref
,
auto
target
)
{
double
error
;
passed
=
verify_range
(
ref
,
target
,
tolerance
,
&
error
);
passed
=
verify
::
verify_range
(
ref
,
target
,
tolerance
,
&
error
);
if
(
not
passed
)
{
// TODO: Check for nans
...
...
@@ -45,27 +45,27 @@ bool verify_args(const std::string& name,
std
::
cout
<<
"ref:"
<<
ref
<<
std
::
endl
;
if
(
target
.
size
()
<
32
)
std
::
cout
<<
"target:"
<<
target
<<
std
::
endl
;
if
(
range_zero
(
ref
))
if
(
verify
::
range_zero
(
ref
))
std
::
cout
<<
"Ref data is all zeros"
<<
std
::
endl
;
if
(
range_zero
(
target
))
if
(
verify
::
range_zero
(
target
))
std
::
cout
<<
"Target data is all zeros"
<<
std
::
endl
;
auto
mxdiff
=
max_diff
(
ref
,
target
);
auto
mxdiff
=
verify
::
max_diff
(
ref
,
target
);
std
::
cout
<<
"Max diff: "
<<
mxdiff
<<
std
::
endl
;
auto
idx
=
mismatch_idx
(
ref
,
target
,
float_equal
);
if
(
idx
<
range_distance
(
ref
))
auto
idx
=
verify
::
mismatch_idx
(
ref
,
target
,
float_equal
);
if
(
idx
<
verify
::
range_distance
(
ref
))
{
std
::
cout
<<
"Mismatch at "
<<
idx
<<
": "
<<
ref
[
idx
]
<<
" != "
<<
target
[
idx
]
<<
std
::
endl
;
}
auto
ref_nan_idx
=
find_idx
(
ref
,
not_finite
);
auto
ref_nan_idx
=
find_idx
(
ref
,
verify
::
not_finite
);
if
(
ref_nan_idx
>=
0
)
std
::
cout
<<
"Non finite number found in ref at "
<<
ref_nan_idx
<<
": "
<<
ref
[
ref_nan_idx
]
<<
std
::
endl
;
auto
target_nan_idx
=
find_idx
(
target
,
not_finite
);
auto
target_nan_idx
=
find_idx
(
target
,
verify
::
not_finite
);
if
(
target_nan_idx
>=
0
)
std
::
cout
<<
"Non finite number found in target at "
<<
target_nan_idx
<<
": "
<<
target
[
target_nan_idx
]
<<
std
::
endl
;
...
...
@@ -73,27 +73,27 @@ bool verify_args(const std::string& name,
}
else
{
if
(
range_zero
(
ref
))
if
(
verify
::
range_zero
(
ref
))
std
::
cout
<<
"Ref data is all zeros"
<<
std
::
endl
;
if
(
range_zero
(
target
))
if
(
verify
::
range_zero
(
target
))
std
::
cout
<<
"Target data is all zeros"
<<
std
::
endl
;
// auto mxdiff = max_diff(ref, target);
// std::cout << "Max diff: " << mxdiff << std::endl;
// auto idx = mismatch_idx(ref, target, float_equal);
// if(idx < range_distance(ref))
// if(idx <
verify::
range_distance(ref))
// {
// std::cout << "Mismatch at " << idx << ": " << ref[idx] << " != " << target[idx]
// << std::endl;
// }
auto
ref_nan_idx
=
find_idx
(
ref
,
not_finite
);
auto
ref_nan_idx
=
find_idx
(
ref
,
verify
::
not_finite
);
if
(
ref_nan_idx
>=
0
)
std
::
cout
<<
"Non finite number found in ref at "
<<
ref_nan_idx
<<
": "
<<
ref
[
ref_nan_idx
]
<<
std
::
endl
;
auto
target_nan_idx
=
find_idx
(
target
,
not_finite
);
auto
target_nan_idx
=
find_idx
(
target
,
verify
::
not_finite
);
if
(
target_nan_idx
>=
0
)
std
::
cout
<<
"Non finite number found in target at "
<<
target_nan_idx
<<
": "
<<
target
[
target_nan_idx
]
<<
std
::
endl
;
...
...
test/CMakeLists.txt
View file @
23cb7917
...
...
@@ -24,8 +24,6 @@
cmake_policy
(
SET CMP0057 NEW
)
include
(
CTest
)
find_package
(
Threads REQUIRED
)
include
(
ProcessorCount
)
ProcessorCount
(
N
)
...
...
@@ -100,21 +98,15 @@ endfunction()
function
(
add_test_executable TEST_NAME
)
add_executable
(
${
TEST_NAME
}
EXCLUDE_FROM_ALL
${
ARGN
}
)
target_link_libraries
(
${
TEST_NAME
}
${
CMAKE_THREAD_LIBS_INIT
}
)
# Cmake does not add flags correctly for gcc
if
(
CMAKE_CXX_COMPILER_ID MATCHES
"GNU"
)
set_target_properties
(
${
TEST_NAME
}
PROPERTIES COMPILE_FLAGS -pthread LINK_FLAGS -pthread
)
endif
()
set
(
TEST_COMMAND
${
TEST_NAME
}
)
add_test_command
(
${
TEST_NAME
}
${
TEST_COMMAND
}
)
add_dependencies
(
tests
${
TEST_NAME
}
)
add_dependencies
(
check
${
TEST_NAME
}
)
target_link_libraries
(
${
TEST_NAME
}
migraphx migraphx_onnx migraphx_ref
)
target_link_libraries
(
${
TEST_NAME
}
Threads::Threads
migraphx migraphx_onnx migraphx_ref
)
target_include_directories
(
${
TEST_NAME
}
PUBLIC include
)
endfunction
(
add_test_executable
)
file
(
GLOB TESTS
${
CONFIGURE_DEPENDS
}
*.cpp
)
file
(
GLOB TESTS CONFIGURE_DEPENDS *.cpp
)
foreach
(
TEST
${
TESTS
}
)
get_filename_component
(
BASE_NAME
${
TEST
}
NAME_WE
)
...
...
@@ -124,7 +116,7 @@ endforeach()
if
(
MIGRAPHX_ENABLE_GPU
)
# gpu tests
file
(
GLOB GPU_TESTS
${
CONFIGURE_DEPENDS
}
gpu/*.cpp
)
file
(
GLOB GPU_TESTS CONFIGURE_DEPENDS gpu/*.cpp
)
foreach
(
TEST
${
GPU_TESTS
}
)
get_filename_component
(
BASE_NAME
${
TEST
}
NAME_WE
)
...
...
@@ -134,13 +126,16 @@ if(MIGRAPHX_ENABLE_GPU)
COST 10
RESOURCE_LOCK gpu
)
if
(
MIGRAPHX_USE_HIPRTC
)
target_compile_definitions
(
test_gpu_
${
BASE_NAME
}
PUBLIC -DMIGRAPHX_USE_HIPRTC
)
endif
()
target_link_libraries
(
test_gpu_
${
BASE_NAME
}
migraphx_gpu migraphx_kernels
)
endforeach
()
endif
()
if
(
MIGRAPHX_ENABLE_FPGA
)
# fpga tests
file
(
GLOB FPGA_TESTS
${
CONFIGURE_DEPENDS
}
fpga/*.cpp
)
file
(
GLOB FPGA_TESTS CONFIGURE_DEPENDS fpga/*.cpp
)
foreach
(
TEST
${
FPGA_TESTS
}
)
get_filename_component
(
BASE_NAME
${
TEST
}
NAME_WE
)
...
...
@@ -187,12 +182,36 @@ if(MIGRAPHX_ENABLE_PYTHON)
add_subdirectory
(
py
)
endif
()
# multitarget test
if
(
MIGRAPHX_ENABLE_GPU AND MIGRAPHX_ENABLE_CPU AND MIGRAPHX_ENABLE_FPGA
)
set
(
TEST_MULTI_TARGET_DIR
${
CMAKE_CURRENT_SOURCE_DIR
}
/multi_target
)
file
(
GLOB MULTI_TARGET_TESTS CONFIGURE_DEPENDS
${
TEST_MULTI_TARGET_DIR
}
/*.cpp
)
foreach
(
MULTI_TARGET_TEST
${
MULTI_TARGET_TESTS
}
)
get_filename_component
(
BASE_NAME
${
MULTI_TARGET_TEST
}
NAME_WE
)
set
(
TEST_NAME test_
${
BASE_NAME
}
)
add_executable
(
${
TEST_NAME
}
${
MULTI_TARGET_TEST
}
)
rocm_clang_tidy_check
(
${
TEST_NAME
}
)
target_link_libraries
(
${
TEST_NAME
}
migraphx migraphx_onnx migraphx_tf migraphx_all_targets
)
target_include_directories
(
${
TEST_NAME
}
PUBLIC include
)
add_test
(
NAME
${
TEST_NAME
}
COMMAND $<TARGET_FILE:
${
TEST_NAME
}
> WORKING_DIRECTORY
${
TEST_MULTI_TARGET_DIR
}
)
add_dependencies
(
tests
${
TEST_NAME
}
)
add_dependencies
(
check
${
TEST_NAME
}
)
endforeach
()
endif
()
function
(
test_header NAME HEADER
)
file
(
WRITE
${
CMAKE_CURRENT_BINARY_DIR
}
/header-main-include-
${
NAME
}
.cpp
"#include <
${
HEADER
}
>
\n
int main() {}
\n
"
file
(
WRITE
${
CMAKE_CURRENT_BINARY_DIR
}
/header-main-include-
${
NAME
}
.cpp
"
#include <
${
HEADER
}
>
int main() {}
\n
"
)
file
(
WRITE
${
CMAKE_CURRENT_BINARY_DIR
}
/header-static-include-
${
NAME
}
.cpp
"#include <
${
HEADER
}
>
\n
"
file
(
WRITE
${
CMAKE_CURRENT_BINARY_DIR
}
/header-static-include-
${
NAME
}
.cpp
"
#include <
${
HEADER
}
>
#if defined(min) || defined(max) || defined(near) || defined(far)
#error
\"
Do not include windows.h in header files
\"
#endif
\n
"
)
add_test_executable
(
${
NAME
}
${
CMAKE_CURRENT_BINARY_DIR
}
/header-main-include-
${
NAME
}
.cpp
...
...
@@ -201,14 +220,14 @@ function(test_header NAME HEADER)
endfunction
()
function
(
test_headers PREFIX
)
file
(
GLOB HEADERS
${
CONFIGURE_DEPENDS
}
${
ARGN
}
)
file
(
GLOB HEADERS CONFIGURE_DEPENDS
${
ARGN
}
)
foreach
(
HEADER
${
HEADERS
}
)
file
(
RELATIVE_PATH HEADER_REL
${
CMAKE_SOURCE_DIR
}
${
HEADER
}
)
string
(
MAKE_C_IDENTIFIER
${
HEADER_REL
}
TEST_NAME
)
get_filename_component
(
BASE_NAME
${
HEADER
}
NAME_WE
)
test_header
(
header_
${
TEST_NAME
}
${
PREFIX
}
/
${
BASE_NAME
}
.hpp
)
target_link_libraries
(
header_
${
TEST_NAME
}
migraphx_all_targets
)
target_link_libraries
(
header_
${
TEST_NAME
}
migraphx migraphx_onnx migraphx_tf
migraphx_all_targets
)
endforeach
()
endfunction
()
...
...
@@ -225,3 +244,4 @@ if(MIGRAPHX_ENABLE_FPGA)
test_headers
(
migraphx/fpga
${
CMAKE_SOURCE_DIR
}
/src/targets/fpga/include/migraphx/fpga/*.hpp
)
endif
()
Prev
1
…
13
14
15
16
17
18
19
20
21
…
23
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment