Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
2751143d
Commit
2751143d
authored
Apr 05, 2023
by
Brian Pickrell
Browse files
recreated branch by hand merge with develop branch
parent
30af1697
Changes
7
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
741 additions
and
191 deletions
+741
-191
src/targets/gpu/CMakeLists.txt
src/targets/gpu/CMakeLists.txt
+12
-1
src/targets/gpu/gemm_impl.cpp
src/targets/gpu/gemm_impl.cpp
+488
-157
src/targets/gpu/include/migraphx/gpu/convolution.hpp
src/targets/gpu/include/migraphx/gpu/convolution.hpp
+25
-5
src/targets/gpu/include/migraphx/gpu/gemm.hpp
src/targets/gpu/include/migraphx/gpu/gemm.hpp
+49
-12
src/targets/gpu/include/migraphx/gpu/gemm_impl.hpp
src/targets/gpu/include/migraphx/gpu/gemm_impl.hpp
+62
-15
src/targets/gpu/include/migraphx/gpu/rocblas.hpp
src/targets/gpu/include/migraphx/gpu/rocblas.hpp
+2
-1
test/gpu/gemm_tune.cpp
test/gpu/gemm_tune.cpp
+103
-0
No files found.
src/targets/gpu/CMakeLists.txt
View file @
2751143d
# ####################################################################################
# ####################################################################################
# The MIT License (MIT)
# The MIT License (MIT)
#
#
# Copyright (c) 2015-202
2
Advanced Micro Devices, Inc. All rights reserved.
# Copyright (c) 2015-202
3
Advanced Micro Devices, Inc. All rights reserved.
#
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# of this software and associated documentation files (the "Software"), to deal
...
@@ -215,10 +215,14 @@ else()
...
@@ -215,10 +215,14 @@ else()
endif
()
endif
()
# Check miopen find mode api
# Check miopen find mode api
include
(
CheckLibraryExists
)
include
(
CheckLibraryExists
)
get_target_property
(
MIOPEN_LOCATION MIOpen LOCATION
)
get_target_property
(
MIOPEN_LOCATION MIOpen LOCATION
)
get_target_property
(
ROCBLAS_LOCATION roc::rocblas LOCATION
)
check_library_exists
(
MIOpen
"miopenHiddenSetConvolutionFindMode"
"
${
MIOPEN_LOCATION
}
"
HAS_FIND_MODE_API
)
check_library_exists
(
MIOpen
"miopenHiddenSetConvolutionFindMode"
"
${
MIOPEN_LOCATION
}
"
HAS_FIND_MODE_API
)
check_library_exists
(
MIOpen
"miopenFindSolutions"
"
${
MIOPEN_LOCATION
}
"
HAS_FIND_2_API
)
check_library_exists
(
MIOpen
"miopenFindSolutions"
"
${
MIOPEN_LOCATION
}
"
HAS_FIND_2_API
)
# Beta API for automated GEMM tuning
check_library_exists
(
roc::rocblas
"rocblas_gemm_ex_get_solutions"
"
${
ROCBLAS_LOCATION
}
"
HAS_ROCBLAS_BETA_FEATURES_API
)
set
(
MIGRAPHX_USE_FIND_2_API
"
${
HAS_FIND_2_API
}
"
CACHE BOOL
""
)
set
(
MIGRAPHX_USE_FIND_2_API
"
${
HAS_FIND_2_API
}
"
CACHE BOOL
""
)
...
@@ -236,6 +240,13 @@ else()
...
@@ -236,6 +240,13 @@ else()
message
(
STATUS
"MIOpen does not have find mode api"
)
message
(
STATUS
"MIOpen does not have find mode api"
)
endif
()
endif
()
if
(
HAS_ROCBLAS_BETA_FEATURES_API
)
target_compile_definitions
(
migraphx_gpu PUBLIC -DROCBLAS_BETA_FEATURES_API -DROCBLAS_NO_DEPRECATED_WARNINGS
)
message
(
STATUS
"MIGraphx is using Beta API of rocBLAS"
)
else
()
message
(
STATUS
"rocBLAS does not have Beta API"
)
endif
()
target_link_libraries
(
migraphx_gpu PUBLIC migraphx MIOpen roc::rocblas
)
target_link_libraries
(
migraphx_gpu PUBLIC migraphx MIOpen roc::rocblas
)
target_link_libraries
(
migraphx_gpu PRIVATE migraphx_device migraphx_kernels
)
target_link_libraries
(
migraphx_gpu PRIVATE migraphx_device migraphx_kernels
)
...
...
src/targets/gpu/gemm_impl.cpp
View file @
2751143d
This diff is collapsed.
Click to expand it.
src/targets/gpu/include/migraphx/gpu/convolution.hpp
View file @
2751143d
/*
/*
* The MIT License (MIT)
* The MIT License (MIT)
*
*
* Copyright (c) 2015-202
2
Advanced Micro Devices, Inc. All rights reserved.
* Copyright (c) 2015-202
3
Advanced Micro Devices, Inc. All rights reserved.
*
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* of this software and associated documentation files (the "Software"), to deal
...
@@ -27,6 +27,7 @@
...
@@ -27,6 +27,7 @@
#include <migraphx/shape.hpp>
#include <migraphx/shape.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/operation.hpp>
#include <migraphx/operation.hpp>
#include <migraphx/register_op.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <migraphx/op/identity.hpp>
#include <migraphx/op/identity.hpp>
#include <migraphx/op/convolution.hpp>
#include <migraphx/op/convolution.hpp>
...
@@ -35,6 +36,9 @@
...
@@ -35,6 +36,9 @@
#include <unordered_map>
#include <unordered_map>
#include <migraphx/reflect.hpp>
#include <migraphx/reflect.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/context.hpp>
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_ENABLE_CONV_TUNING
);
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
gpu
{
...
@@ -163,6 +167,20 @@ struct miopen_convolution
...
@@ -163,6 +167,20 @@ struct miopen_convolution
auto
w_desc
=
make_tensor
(
reshape_if_1d
(
inputs
[
1
]),
int8_x4_format
);
auto
w_desc
=
make_tensor
(
reshape_if_1d
(
inputs
[
1
]),
int8_x4_format
);
auto
y_desc
=
make_tensor
(
reshape_if_1d
(
output_shape
));
auto
y_desc
=
make_tensor
(
reshape_if_1d
(
output_shape
));
std
::
size_t
workspace_size
=
0
;
std
::
size_t
workspace_size
=
0
;
// TODO: Using an environment variable to disable convolution tuning while still allowing
// other (i.e. GEMM) tuning with --exhaustive-tune, is a workaround for a problem in which
// convolution tuning takes multiple hours.
const
bool
convo_tune_enabled
=
enabled
(
MIGRAPHX_ENABLE_CONV_TUNING
{});
if
(
ctx
.
get_exhaustive_tune_flag
()
and
not
convo_tune_enabled
)
{
std
::
cerr
<<
"WARNING: MIGraphX convolution tuning not enabled. To run tuning, set both the "
"env "
"var MIGRAPHX_ENABLE_CONV_TUNING and the command argument --exhaustive-tune."
<<
std
::
endl
;
}
#ifdef MIGRAPHX_HAS_FIND_2_API
#ifdef MIGRAPHX_HAS_FIND_2_API
{
{
auto
conv_problem
=
make_obj
<
miopen_problem
>
(
auto
conv_problem
=
make_obj
<
miopen_problem
>
(
...
@@ -174,9 +192,10 @@ struct miopen_convolution
...
@@ -174,9 +192,10 @@ struct miopen_convolution
auto
*
miopen_stream_handle
=
ctx
.
get_stream
().
get_miopen
();
auto
*
miopen_stream_handle
=
ctx
.
get_stream
().
get_miopen
();
solution_ptr
=
find_solution
(
solution_ptr
=
find_solution
(
miopen_stream_handle
,
miopen_stream_handle
,
conv_problem
.
get
(),
ctx
.
get_exhaustive_tune_flag
());
conv_problem
.
get
(),
auto
status
=
miopenGetSolutionWorkspaceSize
(
solution_ptr
.
get
(),
&
workspace_size
);
ctx
.
get_exhaustive_tune_flag
()
and
convo_tune_enabled
);
auto
status
=
miopenGetSolutionWorkspaceSize
(
solution_ptr
.
get
(),
&
workspace_size
);
if
(
status
!=
miopenStatusSuccess
)
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen"
+
op
.
name
()
+
" : failed to get solution's workspace size"
);
MIGRAPHX_THROW
(
"MIOpen"
+
op
.
name
()
+
" : failed to get solution's workspace size"
);
...
@@ -233,7 +252,8 @@ struct miopen_convolution
...
@@ -233,7 +252,8 @@ struct miopen_convolution
&
perf
,
&
perf
,
workspace
.
implicit
(),
workspace
.
implicit
(),
workspace_size
,
workspace_size
,
ctx
.
get_exhaustive_tune_flag
());
ctx
.
get_exhaustive_tune_flag
()
and
convo_tune_enabled
);
if
(
status
!=
miopenStatusSuccess
)
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen "
+
op
.
name
()
+
" : find convolution failed"
);
MIGRAPHX_THROW
(
"MIOpen "
+
op
.
name
()
+
" : find convolution failed"
);
algo
=
perf
.
fwd_algo
;
algo
=
perf
.
fwd_algo
;
...
...
src/targets/gpu/include/migraphx/gpu/gemm.hpp
View file @
2751143d
/*
/*
* The MIT License (MIT)
* The MIT License (MIT)
*
*
* Copyright (c) 2015-202
2
Advanced Micro Devices, Inc. All rights reserved.
* Copyright (c) 2015-202
3
Advanced Micro Devices, Inc. All rights reserved.
*
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* of this software and associated documentation files (the "Software"), to deal
...
@@ -40,9 +40,8 @@ inline namespace MIGRAPHX_INLINE_NS {
...
@@ -40,9 +40,8 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace
gpu
{
namespace
gpu
{
struct
context
;
struct
context
;
void
blas_shape
(
const
shape
&
s
);
shape
transpose_batch
(
const
shape
&
s
,
unsigned
trans_batch
);
shape
transpose_batch
(
const
shape
&
s
,
unsigned
trans_batch
);
void
blas_shape
(
const
shape
&
s
);
template
<
class
Op
>
template
<
class
Op
>
struct
rocblas_gemm
struct
rocblas_gemm
...
@@ -53,6 +52,7 @@ struct rocblas_gemm
...
@@ -53,6 +52,7 @@ struct rocblas_gemm
bool
int8_x4_format
=
true
;
bool
int8_x4_format
=
true
;
bool
compute_fp32
=
false
;
bool
compute_fp32
=
false
;
unsigned
trans_batch
=
0
;
unsigned
trans_batch
=
0
;
int32_t
solution_idx
=
0
;
template
<
class
Self
,
class
F
>
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
static
auto
reflect
(
Self
&
self
,
F
f
)
...
@@ -62,7 +62,8 @@ struct rocblas_gemm
...
@@ -62,7 +62,8 @@ struct rocblas_gemm
f
(
self
.
beta
,
"beta"
),
f
(
self
.
beta
,
"beta"
),
f
(
self
.
int8_x4_format
,
"int8_x4_format"
),
f
(
self
.
int8_x4_format
,
"int8_x4_format"
),
f
(
self
.
compute_fp32
,
"compute_fp32"
),
f
(
self
.
compute_fp32
,
"compute_fp32"
),
f
(
self
.
trans_batch
,
"trans_batch"
)));
f
(
self
.
trans_batch
,
"trans_batch"
),
f
(
self
.
solution_idx
,
"solution_idx"
)));
}
}
std
::
string
name
()
const
std
::
string
name
()
const
...
@@ -113,17 +114,19 @@ struct rocblas_gemm
...
@@ -113,17 +114,19 @@ struct rocblas_gemm
{
{
if
(
this
->
name
()
==
"gpu::gemm"
)
if
(
this
->
name
()
==
"gpu::gemm"
)
{
{
gemm
(
ctx
,
output_shape
,
args
,
alpha
,
beta
,
int8_x4_format
,
compute_fp32
);
gemm_compute
(
ctx
,
output_shape
,
args
,
alpha
,
beta
,
int8_x4_format
,
compute_fp32
,
solution_idx
);
}
}
else
else
{
{
gemm
(
ctx
,
gemm_compute
(
ctx
,
output_shape
,
output_shape
,
args
,
args
,
int32_t
(
alpha
),
int32_t
(
alpha
),
int32_t
(
beta
),
int32_t
(
beta
),
int8_x4_format
,
int8_x4_format
,
compute_fp32
);
compute_fp32
,
solution_idx
);
}
}
return
args
.
back
();
return
args
.
back
();
}
}
...
@@ -132,6 +135,40 @@ struct rocblas_gemm
...
@@ -132,6 +135,40 @@ struct rocblas_gemm
{
{
return
shapes
.
size
()
-
1
;
return
shapes
.
size
()
-
1
;
}
}
void
finalize
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
shape
>&
input_shapes
)
{
#ifdef ROCBLAS_BETA_FEATURES_API
if
(
ctx
.
get_exhaustive_tune_flag
()
&&
solution_idx
==
0
)
{
if
(
this
->
name
()
==
"gpu::gemm"
)
{
solution_idx
=
gemm_finalize
(
ctx
,
output_shape
,
input_shapes
,
alpha
,
beta
,
int8_x4_format
,
compute_fp32
,
solution_idx
);
}
else
{
solution_idx
=
gemm_finalize
(
ctx
,
output_shape
,
input_shapes
,
int32_t
(
alpha
),
int32_t
(
beta
),
int8_x4_format
,
compute_fp32
,
solution_idx
);
}
}
#else
// suppress compiler warnings
(
void
)
ctx
,
(
void
)
output_shape
,
(
void
)
input_shapes
;
#endif
}
};
};
}
// namespace gpu
}
// namespace gpu
...
...
src/targets/gpu/include/migraphx/gpu/gemm_impl.hpp
View file @
2751143d
/*
/*
* The MIT License (MIT)
* The MIT License (MIT)
*
*
* Copyright (c) 2015-202
2
Advanced Micro Devices, Inc. All rights reserved.
* Copyright (c) 2015-202
3
Advanced Micro Devices, Inc. All rights reserved.
*
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* of this software and associated documentation files (the "Software"), to deal
...
@@ -24,28 +24,75 @@
...
@@ -24,28 +24,75 @@
#ifndef MIGRAPHX_GUARD_RTGLIB_GEMM_IMPL_HPP
#ifndef MIGRAPHX_GUARD_RTGLIB_GEMM_IMPL_HPP
#define MIGRAPHX_GUARD_RTGLIB_GEMM_IMPL_HPP
#define MIGRAPHX_GUARD_RTGLIB_GEMM_IMPL_HPP
#include <iterator>
#include <migraphx/shape.hpp>
#include <migraphx/shape.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/reduce_dims.hpp>
#include <migraphx/gpu/hip.hpp>
#include <migraphx/time.hpp>
using
milliseconds
=
std
::
chrono
::
duration
<
double
,
std
::
milli
>
;
using
microseconds
=
std
::
chrono
::
duration
<
double
,
std
::
micro
>
;
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
gpu
{
void
gemm
(
context
&
ctx
,
#if ROCBLAS_VERSION_MAJOR >= 2 && ROCBLAS_VERSION_MINOR >= 38
const
shape
&
output_shape
,
using
flag_type
=
rocblas_gemm_flags
;
const
std
::
vector
<
argument
>&
args
,
#else
float
alpha
,
using
flag_type
=
int
;
float
beta
,
#endif
bool
int8_x4_format
,
bool
compute_fp32
);
/**
void
gemm
(
context
&
ctx
,
* @brief Templated implementations of the compute() and finalize() methods of the Gemm operator.
const
shape
&
output_shape
,
* For each function there are overloads using either float or int32_t for the arguments
const
std
::
vector
<
argument
>&
args
,
* alpha and beta.
int32_t
alpha
,
*
int32_t
beta
,
* @param ctx .
bool
int8_x4_format
,
* @param output_shape .
bool
compute_fp32
);
* @param args .
* @param alpha .
* @param beta .
* @param int8_x4_format .
* @param compute_fp32 .
*/
void
gemm_compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
,
float
alpha
,
float
beta
,
bool
int8_x4_format
,
bool
compute_fp32
,
int32_t
solution_idx
);
void
gemm_compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
,
int32_t
alpha
,
int32_t
beta
,
bool
int8_x4_format
,
bool
compute_fp32
,
int32_t
solution_idx
);
int32_t
gemm_finalize
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
shape
>&
input_shapes
,
float
alpha
,
float
beta
,
bool
int8_x4_format
,
bool
compute_fp32
);
int32_t
gemm_finalize
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
shape
>&
input_shapes
,
int32_t
alpha
,
int32_t
beta
,
bool
int8_x4_format
,
bool
compute_fp32
,
int32_t
solution_idx
);
}
// namespace gpu
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace MIGRAPHX_INLINE_NS
...
...
src/targets/gpu/include/migraphx/gpu/rocblas.hpp
View file @
2751143d
/*
/*
* The MIT License (MIT)
* The MIT License (MIT)
*
*
* Copyright (c) 2015-202
2
Advanced Micro Devices, Inc. All rights reserved.
* Copyright (c) 2015-202
3
Advanced Micro Devices, Inc. All rights reserved.
*
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* of this software and associated documentation files (the "Software"), to deal
...
@@ -25,6 +25,7 @@
...
@@ -25,6 +25,7 @@
#define MIGRAPHX_GUARD_MIGRAPHLIB_ROCBLAS_HPP
#define MIGRAPHX_GUARD_MIGRAPHLIB_ROCBLAS_HPP
#include <migraphx/manage_ptr.hpp>
#include <migraphx/manage_ptr.hpp>
#include <migraphx/config.hpp>
#include <migraphx/config.hpp>
// ROCBLAS_BETA_FEATURES_API is defined by CMake, if available.
#include <rocblas/rocblas.h>
#include <rocblas/rocblas.h>
namespace
migraphx
{
namespace
migraphx
{
...
...
test/gpu/gemm_tune.cpp
0 → 100644
View file @
2751143d
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <iostream>
#include <vector>
#include <migraphx/gpu/gemm.hpp>
#include <hip/hip_runtime_api.h>
#include <migraphx/gpu/target.hpp>
#include <migraphx/verify.hpp>
#include <test.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/iterator_for.hpp>
// includes needed for run_lowering
#include <migraphx/gpu/allocation_model.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/lowering.hpp>
#include <migraphx/gpu/target.hpp>
#include <migraphx/auto_contiguous.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/pass_manager.hpp>
// Abbreviated lowering; we don't need the usual cleanup passes for this test
void
run_lowering
(
migraphx
::
program
&
p
,
bool
offload_copy
=
false
)
{
auto
ctx
=
migraphx
::
gpu
::
context
{};
migraphx
::
run_passes
(
*
p
.
get_main_module
(),
{
migraphx
::
auto_contiguous
{},
migraphx
::
gpu
::
lowering
{
&
ctx
,
offload_copy
}});
}
/**
* Tests the automatic GEMM tuning feature. In the finalize() method of the gemm op,
* rocBLAS API functions are called to quickly benchmark all the GEMM solutions
* available in the currently installed rocBLAS library and choose the index of the fastest.
*/
TEST_CASE
(
gemm_tune_with_rocblas
)
{
migraphx
::
program
p
;
auto
*
mm
=
p
.
get_main_module
();
migraphx
::
shape
sa
{
migraphx
::
shape
::
float_type
,
{
4
,
2
}};
migraphx
::
shape
sb
{
migraphx
::
shape
::
float_type
,
{
2
,
3
}};
auto
a
=
mm
->
add_parameter
(
"a"
,
sa
);
auto
b
=
mm
->
add_parameter
(
"b"
,
sb
);
migraphx
::
operation
dot_op
=
migraphx
::
make_op
(
"dot"
);
mm
->
add_instruction
(
dot_op
,
a
,
b
);
// lowering adds gemm implementation for dot operator
run_lowering
(
p
);
migraphx
::
target
gpu_t
=
migraphx
::
gpu
::
target
{};
migraphx
::
compile_options
options
;
options
.
exhaustive_tune
=
true
;
p
.
compile
(
gpu_t
,
options
);
migraphx
::
value
solution_idx
(
0
);
for
(
auto
ins
:
iterator_for
(
*
p
.
get_main_module
()))
{
if
(
ins
->
name
()
==
"gpu::gemm"
)
{
auto
gemm_op
=
migraphx
::
get_operation
(
ins
);
// tuned solution index is not deterministic, but anything other than 0
// (default, invalid, or not available) is good.
// gemm_op.to_value().debug_print();
solution_idx
=
gemm_op
.
to_value
()[
"solution_idx"
];
break
;
}
}
#ifdef ROCBLAS_BETA_FEATURES_API
EXPECT
(
0
!=
solution_idx
.
to
<
std
::
size_t
>
());
#else
EXPECT
(
0
==
solution_idx
.
to
<
std
::
size_t
>
());
#endif
}
// TODO: make tests that run both strided and not-strided GEMMs. Also, try tuning with an invalid
// start value.
int
main
(
int
argc
,
const
char
*
argv
[])
{
test
::
run
(
argc
,
argv
);
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment