Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
23cb7917
Unverified
Commit
23cb7917
authored
Aug 16, 2023
by
Brian Pickrell
Committed by
GitHub
Aug 16, 2023
Browse files
Merge branch 'develop' into blas_tuning
parents
b5fcc0bc
ea32ca70
Changes
458
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
665 additions
and
77 deletions
+665
-77
src/targets/gpu/include/migraphx/gpu/device/scatter.hpp
src/targets/gpu/include/migraphx/gpu/device/scatter.hpp
+2
-2
src/targets/gpu/include/migraphx/gpu/device/topk.hpp
src/targets/gpu/include/migraphx/gpu/device/topk.hpp
+13
-13
src/targets/gpu/include/migraphx/gpu/device_name.hpp
src/targets/gpu/include/migraphx/gpu/device_name.hpp
+5
-3
src/targets/gpu/include/migraphx/gpu/fuse_ck.hpp
src/targets/gpu/include/migraphx/gpu/fuse_ck.hpp
+48
-0
src/targets/gpu/include/migraphx/gpu/fuse_mlir.hpp
src/targets/gpu/include/migraphx/gpu/fuse_mlir.hpp
+3
-2
src/targets/gpu/include/migraphx/gpu/hip.hpp
src/targets/gpu/include/migraphx/gpu/hip.hpp
+31
-20
src/targets/gpu/include/migraphx/gpu/kernel.hpp
src/targets/gpu/include/migraphx/gpu/kernel.hpp
+2
-2
src/targets/gpu/include/migraphx/gpu/lowering.hpp
src/targets/gpu/include/migraphx/gpu/lowering.hpp
+3
-4
src/targets/gpu/include/migraphx/gpu/miopen.hpp
src/targets/gpu/include/migraphx/gpu/miopen.hpp
+29
-7
src/targets/gpu/include/migraphx/gpu/mlir.hpp
src/targets/gpu/include/migraphx/gpu/mlir.hpp
+15
-8
src/targets/gpu/include/migraphx/gpu/pack_args.hpp
src/targets/gpu/include/migraphx/gpu/pack_args.hpp
+2
-2
src/targets/gpu/include/migraphx/gpu/pack_int8_args.hpp
src/targets/gpu/include/migraphx/gpu/pack_int8_args.hpp
+1
-2
src/targets/gpu/include/migraphx/gpu/rocblas.hpp
src/targets/gpu/include/migraphx/gpu/rocblas.hpp
+3
-2
src/targets/gpu/include/migraphx/gpu/target.hpp
src/targets/gpu/include/migraphx/gpu/target.hpp
+2
-2
src/targets/gpu/include/migraphx/gpu/time_op.hpp
src/targets/gpu/include/migraphx/gpu/time_op.hpp
+1
-3
src/targets/gpu/include/migraphx/gpu/tuning_config.hpp
src/targets/gpu/include/migraphx/gpu/tuning_config.hpp
+43
-0
src/targets/gpu/include/migraphx/gpu/write_literals.hpp
src/targets/gpu/include/migraphx/gpu/write_literals.hpp
+1
-1
src/targets/gpu/jit/ck_gemm.cpp
src/targets/gpu/jit/ck_gemm.cpp
+457
-0
src/targets/gpu/jit/concat.cpp
src/targets/gpu/jit/concat.cpp
+2
-2
src/targets/gpu/jit/gather.cpp
src/targets/gpu/jit/gather.cpp
+2
-2
No files found.
src/targets/gpu/include/migraphx/gpu/device/scatter.hpp
View file @
23cb7917
...
...
@@ -25,7 +25,7 @@
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_SCATTER_HPP
#include <migraphx/argument.hpp>
#include <migraphx/config.hpp>
#include <migraphx/
gpu/device/
config.hpp>
#include <hip/hip_runtime_api.h>
namespace
migraphx
{
...
...
@@ -33,7 +33,7 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace
gpu
{
namespace
device
{
argument
scatter
(
argument
MIGRAPHX_DEVICE_EXPORT
scatter
(
hipStream_t
stream
,
argument
result
,
argument
arg0
,
argument
arg1
,
argument
arg2
,
int64_t
axis
);
}
// namespace device
...
...
src/targets/gpu/include/migraphx/gpu/device/topk.hpp
View file @
23cb7917
...
...
@@ -25,7 +25,7 @@
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_TOPK_HPP
#include <migraphx/argument.hpp>
#include <migraphx/config.hpp>
#include <migraphx/
gpu/device/
config.hpp>
#include <hip/hip_runtime_api.h>
namespace
migraphx
{
...
...
@@ -33,19 +33,19 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace
gpu
{
namespace
device
{
argument
topk_smallest
(
hipStream_t
stream
,
const
argument
&
val_res
,
const
argument
&
ind_res
,
const
argument
&
arg
,
int64_t
k
,
int64_t
axis
);
argument
MIGRAPHX_DEVICE_EXPORT
topk_smallest
(
hipStream_t
stream
,
const
argument
&
val_res
,
const
argument
&
ind_res
,
const
argument
&
arg
,
int64_t
k
,
int64_t
axis
);
argument
topk_largest
(
hipStream_t
stream
,
const
argument
&
val_res
,
const
argument
&
ind_res
,
const
argument
&
arg
,
int64_t
k
,
int64_t
axis
);
argument
MIGRAPHX_DEVICE_EXPORT
topk_largest
(
hipStream_t
stream
,
const
argument
&
val_res
,
const
argument
&
ind_res
,
const
argument
&
arg
,
int64_t
k
,
int64_t
axis
);
}
// namespace device
}
// namespace gpu
...
...
src/targets/gpu/include/migraphx/gpu/device_name.hpp
View file @
23cb7917
...
...
@@ -24,16 +24,18 @@
#ifndef MIGRAPHX_GUARD_GPU_DEVICE_NAME_HPP
#define MIGRAPHX_GUARD_GPU_DEVICE_NAME_HPP
#include <migraphx/config.hpp>
#include <migraphx/
gpu/
config.hpp>
#include <string>
struct
hipDeviceProp_t
;
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
std
::
string
get_device_name
();
MIGRAPHX_GPU_EXPORT
std
::
string
get_device_name
();
int
get_device_id
();
MIGRAPHX_GPU_EXPORT
int
get_device_id
();
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
...
...
src/targets/gpu/include/migraphx/gpu/fuse_ck.hpp
0 → 100644
View file @
23cb7917
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_GPU_FUSE_CK_HPP
#define MIGRAPHX_GUARD_GPU_FUSE_CK_HPP
#include <migraphx/config.hpp>
#include <migraphx/gpu/context.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
struct
module_pass_manager
;
namespace
gpu
{
struct
fuse_ck
{
context
*
ctx
=
nullptr
;
std
::
string
name
()
const
{
return
"gpu::fuse_ck"
;
}
void
apply
(
module_pass_manager
&
mpm
)
const
;
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif // MIGRAPHX_GUARD_GPU_FUSE_CK_HPP
src/targets/gpu/include/migraphx/gpu/fuse_mlir.hpp
View file @
23cb7917
...
...
@@ -24,7 +24,6 @@
#ifndef MIGRAPHX_GUARD_GPU_FUSE_MLIR_HPP
#define MIGRAPHX_GUARD_GPU_FUSE_MLIR_HPP
#include <migraphx/config.hpp>
#include <migraphx/gpu/context.hpp>
namespace
migraphx
{
...
...
@@ -34,7 +33,9 @@ struct module_pass_manager;
namespace
gpu
{
struct
fuse_mlir
MIGRAPHX_GPU_EXPORT
bool
mlir_enabled
();
struct
MIGRAPHX_GPU_EXPORT
fuse_mlir
{
context
*
ctx
=
nullptr
;
std
::
string
name
()
const
{
return
"gpu::fuse_mlir"
;
}
...
...
src/targets/gpu/include/migraphx/gpu/hip.hpp
View file @
23cb7917
...
...
@@ -24,11 +24,12 @@
#ifndef MIGRAPHX_GUARD_MIGRAPHLIB_HIP_HPP
#define MIGRAPHX_GUARD_MIGRAPHLIB_HIP_HPP
#include <migraphx/config.hpp>
#include <migraphx/
gpu/
config.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/literal.hpp>
#include <migraphx/check_shapes.hpp>
#include <migraphx/functional.hpp>
#include <migraphx/dyn_output.hpp>
#include <utility>
namespace
migraphx
{
...
...
@@ -37,26 +38,26 @@ namespace gpu {
struct
context
;
std
::
string
hip_error
(
int
error
);
MIGRAPHX_GPU_EXPORT
std
::
string
hip_error
(
int
error
);
argument
allocate_gpu
(
const
shape
&
s
,
bool
host
=
false
);
MIGRAPHX_GPU_EXPORT
argument
allocate_gpu
(
const
shape
&
s
,
bool
host
=
false
);
argument
register_on_gpu
(
const
argument
&
arg
);
MIGRAPHX_GPU_EXPORT
argument
register_on_gpu
(
const
argument
&
arg
);
argument
to_gpu
(
const
argument
&
arg
,
bool
host
=
false
);
MIGRAPHX_GPU_EXPORT
argument
to_gpu
(
const
argument
&
arg
,
bool
host
=
false
);
argument
from_gpu
(
const
argument
&
arg
);
MIGRAPHX_GPU_EXPORT
argument
from_gpu
(
const
argument
&
arg
);
void
set_device
(
std
::
size_t
id
);
MIGRAPHX_GPU_EXPORT
void
set_device
(
std
::
size_t
id
);
void
gpu_sync
();
void
gpu_sync
(
const
context
&
ctx
);
MIGRAPHX_GPU_EXPORT
void
gpu_sync
();
MIGRAPHX_GPU_EXPORT
void
gpu_sync
(
const
context
&
ctx
);
void
gpu_copy
(
context
&
ctx
,
const
argument
&
src
,
const
argument
&
dst
);
void
copy_to_gpu
(
context
&
ctx
,
const
argument
&
src
,
const
argument
&
dst
);
void
copy_from_gpu
(
context
&
ctx
,
const
argument
&
src
,
const
argument
&
dst
);
MIGRAPHX_GPU_EXPORT
void
gpu_copy
(
context
&
ctx
,
const
argument
&
src
,
const
argument
&
dst
);
MIGRAPHX_GPU_EXPORT
void
copy_to_gpu
(
context
&
ctx
,
const
argument
&
src
,
const
argument
&
dst
);
MIGRAPHX_GPU_EXPORT
void
copy_from_gpu
(
context
&
ctx
,
const
argument
&
src
,
const
argument
&
dst
);
argument
get_preallocation
(
context
&
ctx
,
const
std
::
string
&
id
);
MIGRAPHX_GPU_EXPORT
argument
get_preallocation
(
context
&
ctx
,
const
std
::
string
&
id
);
struct
hip_allocate
{
...
...
@@ -91,7 +92,7 @@ struct hip_sync_stream
return
inputs
.
front
();
}
argument
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
argument
compute
(
const
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
{
gpu_sync
(
ctx
);
if
(
args
.
empty
())
...
...
@@ -112,7 +113,7 @@ struct hip_copy_to_gpu
std
::
string
name
()
const
{
return
"hip::copy_to_gpu"
;
}
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
{
check_shapes
{
inputs
,
*
this
}.
has
(
1
,
2
).
same_type
();
check_shapes
{
inputs
,
*
this
,
true
}.
has
(
1
,
2
).
same_type
();
return
inputs
.
at
(
0
);
}
argument
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
...
...
@@ -121,6 +122,10 @@ struct hip_copy_to_gpu
if
(
args
.
size
()
==
1
)
return
input
;
argument
result
=
args
[
1
].
share
();
if
(
result
.
get_shape
().
dynamic
())
{
result
=
result
.
reshape
(
args
[
0
].
get_shape
());
}
gpu_copy
(
ctx
,
input
,
result
);
// Associate the input since it was registered with hip
return
{
result
.
get_shape
(),
[
input
,
result
]()
mutable
{
return
result
.
data
();
}};
...
...
@@ -138,19 +143,24 @@ struct hip_copy_from_gpu
std
::
string
name
()
const
{
return
"hip::copy_from_gpu"
;
}
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
{
check_shapes
{
inputs
,
*
this
}.
has
(
1
,
2
).
same_type
();
check_shapes
{
inputs
,
*
this
,
true
}.
has
(
1
,
2
).
same_type
();
return
inputs
.
at
(
0
);
}
argument
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
compute
(
context
&
ctx
,
const
dyn_output
&
dyn_out
,
const
std
::
vector
<
argument
>&
args
)
const
{
if
(
args
.
size
()
==
1
)
{
argument
result
=
allocate_gpu
(
out
put_shape
,
true
);
argument
result
=
allocate_gpu
(
dyn_out
.
com
put
ed
_shape
,
true
);
gpu_copy
(
ctx
,
args
[
0
],
result
);
return
result
;
}
copy_from_gpu
(
ctx
,
args
[
0
],
args
[
1
]);
argument
input
=
args
[
0
].
share
();
if
(
input
.
get_shape
().
dynamic
())
{
input
=
input
.
reshape
(
args
[
1
].
get_shape
());
}
copy_from_gpu
(
ctx
,
input
,
args
[
1
]);
return
args
[
1
];
}
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
args
)
const
...
...
@@ -177,7 +187,8 @@ struct hip_copy
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
)
const
{
return
1
;
}
};
void
store_preallocated_param
(
context
&
ctx
,
const
std
::
string
&
id
,
const
argument
&
a
);
MIGRAPHX_GPU_EXPORT
void
store_preallocated_param
(
context
&
ctx
,
const
std
::
string
&
id
,
const
argument
&
a
);
struct
hip_allocate_memory
{
...
...
src/targets/gpu/include/migraphx/gpu/kernel.hpp
View file @
23cb7917
...
...
@@ -24,7 +24,7 @@
#ifndef MIGRAPHX_GUARD_RTGLIB_KERNEL_HPP
#define MIGRAPHX_GUARD_RTGLIB_KERNEL_HPP
#include <migraphx/config.hpp>
#include <migraphx/
gpu/
config.hpp>
#include <migraphx/gpu/pack_args.hpp>
#include <hip/hip_runtime_api.h>
#include <memory>
...
...
@@ -37,7 +37,7 @@ namespace gpu {
struct
kernel_impl
;
struct
kernel
struct
MIGRAPHX_GPU_EXPORT
kernel
{
kernel
()
=
default
;
kernel
(
const
char
*
image
,
const
std
::
string
&
name
);
...
...
src/targets/gpu/include/migraphx/gpu/lowering.hpp
View file @
23cb7917
...
...
@@ -24,13 +24,12 @@
#ifndef MIGRAPHX_GUARD_RTGLIB_MIOPEN_LOWERING_HPP
#define MIGRAPHX_GUARD_RTGLIB_MIOPEN_LOWERING_HPP
#include <migraphx/config.hpp>
#include <migraphx/gpu/context.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
struct
module
;
struct
module
_pass_manager
;
namespace
gpu
{
...
...
@@ -40,12 +39,12 @@ namespace gpu {
* * Maps instructions to their GPU-specific counterparts.
* * Inserts `allocate` instructions before GPU operators.
*/
struct
lowering
struct
MIGRAPHX_GPU_EXPORT
lowering
{
context
*
ctx
;
bool
offload_copy
;
std
::
string
name
()
const
{
return
"gpu::lowering"
;
}
void
apply
(
module
&
m
)
const
;
void
apply
(
module
_pass_manager
&
mp
m
)
const
;
};
}
// namespace gpu
...
...
src/targets/gpu/include/migraphx/gpu/miopen.hpp
View file @
23cb7917
...
...
@@ -75,21 +75,43 @@ using miopen_find_options = MIGRAPHX_MANAGE_PTR(miopenFindOptions_t, miopenDestr
using
miopen_problem
=
MIGRAPHX_MANAGE_PTR
(
miopenProblem_t
,
miopenDestroyProblem
);
using
miopen_solution
=
MIGRAPHX_MANAGE_PTR
(
miopenSolution_t
,
miopenDestroySolution
);
inline
miopen_solution
find_solution
(
miopenHandle_t
handle
,
miopenProblem_t
problem
,
bool
tune
=
false
)
inline
miopen_solution
find_solution
(
miopenHandle_t
handle
,
size_t
num_inputs
,
const
miopenTensorArgument_t
*
tensor_args
,
void
*
workspace
,
size_t
workspace_size
,
miopenProblem_t
problem
,
bool
tune
=
false
)
{
miopenSolution_t
solution
;
size_t
found
=
0
;
miopen_find_options
fo
=
nullptr
;
miopen_find_options
fo
=
make_obj
<
miopen_find_options
>
(
&
miopenCreateFindOptions
)
;
if
(
tune
)
{
fo
=
make_obj
<
miopen_find_options
>
(
&
miopenCreateFindOptions
);
miopenSetFindOptionTuning
(
fo
.
get
(),
1
);
}
auto
status
=
miopenFindSolutions
(
handle
,
problem
,
fo
.
get
(),
&
solution
,
&
found
,
1
);
#ifdef MIGRAPHX_PREALLOCATE_MIOPEN_BUFFERS
for
(
auto
i
:
range
(
num_inputs
))
{
auto
status
=
miopenSetFindOptionPreallocatedTensor
(
fo
.
get
(),
tensor_args
[
i
].
id
,
tensor_args
[
i
].
buffer
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen: failed to preallocate tensors for the find process"
);
}
auto
status
=
miopenSetFindOptionPreallocatedWorkspace
(
fo
.
get
(),
workspace
,
workspace_size
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen: failed to preallocate workspace for the find process"
);
#else
miopenStatus_t
status
;
(
void
)(
num_inputs
);
(
void
)(
tensor_args
);
(
void
)(
workspace_size
);
(
void
)(
workspace
);
#endif
status
=
miopenFindSolutions
(
handle
,
problem
,
fo
.
get
(),
&
solution
,
&
found
,
1
);
auto
result
=
miopen_solution
{
solution
};
if
(
status
!=
miopenStatusSuccess
or
found
==
0
)
MIGRAPHX_THROW
(
"MIOpen miopenFindSolutions failed"
);
MIGRAPHX_THROW
(
"MIOpen
:
miopenFindSolutions failed"
);
return
result
;
}
...
...
@@ -170,7 +192,7 @@ inline convolution_descriptor make_conv(const T& op)
}
template
<
class
T
>
inline
convolution_descriptor
make_
de
conv
(
const
T
&
op
)
inline
convolution_descriptor
make_conv
olution_backwards
(
const
T
&
op
)
{
auto
c
=
make_obj
<
convolution_descriptor
>
(
&
miopenCreateConvolutionDescriptor
);
miopenConvolutionMode_t
c_mode
=
miopenTranspose
;
...
...
src/targets/gpu/include/migraphx/gpu/mlir.hpp
View file @
23cb7917
...
...
@@ -26,23 +26,30 @@
#include <string>
#include <vector>
#include <migraphx/config.hpp>
#include <migraphx/
gpu/
config.hpp>
#include <migraphx/gpu/code_object_op.hpp>
#include <migraphx/instruction_ref.hpp>
#include <migraphx/gpu/tuning_config.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
struct
module
;
namespace
gpu
{
std
::
string
dump_mlir
(
const
module
&
m
);
code_object_op
compile_mlir
(
const
context
&
ctx
,
module
m
,
const
std
::
vector
<
instruction_ref
>&
inputs
);
MIGRAPHX_GPU_EXPORT
std
::
string
dump_mlir
(
const
module
&
m
);
MIGRAPHX_GPU_EXPORT
code_object_op
compile_mlir
(
const
context
&
migraphx_ctx
,
module
m
,
const
std
::
vector
<
instruction_ref
>&
inputs
,
const
value
&
solution
);
instruction_ref
insert_mlir
(
module
&
m
,
instruction_ref
ins
,
code_object_op
co
,
const
std
::
vector
<
instruction_ref
>&
inputs
);
MIGRAPHX_GPU_EXPORT
instruction_ref
insert_mlir
(
module
&
m
,
instruction_ref
ins
,
code_object_op
co
,
const
std
::
vector
<
instruction_ref
>&
inputs
);
MIGRAPHX_GPU_EXPORT
tuning_config
get_tuning_config_mlir
(
const
context
&
migraphx_ctx
,
module
m
,
const
std
::
vector
<
shape
>&
inputs
);
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
...
...
src/targets/gpu/include/migraphx/gpu/pack_args.hpp
View file @
23cb7917
...
...
@@ -24,7 +24,7 @@
#ifndef MIGRAPHX_GUARD_RTGLIB_PACK_ARGS_HPP
#define MIGRAPHX_GUARD_RTGLIB_PACK_ARGS_HPP
#include <migraphx/config.hpp>
#include <migraphx/
gpu/
config.hpp>
#include <migraphx/requires.hpp>
#include <utility>
#include <vector>
...
...
@@ -46,7 +46,7 @@ struct kernel_argument
void
*
data
;
};
std
::
vector
<
char
>
pack_args
(
const
std
::
vector
<
kernel_argument
>&
args
);
MIGRAPHX_GPU_EXPORT
std
::
vector
<
char
>
pack_args
(
const
std
::
vector
<
kernel_argument
>&
args
);
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
...
...
src/targets/gpu/include/migraphx/gpu/pack_int8_args.hpp
View file @
23cb7917
...
...
@@ -25,7 +25,6 @@
#define MIGRAPHX_GUARD_RTGLIB_PACK_INT8_ARGS_HPP
#include <migraphx/program.hpp>
#include <migraphx/config.hpp>
#include <migraphx/gpu/context.hpp>
namespace
migraphx
{
...
...
@@ -33,7 +32,7 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace
gpu
{
struct
pack_int8_args
struct
MIGRAPHX_GPU_EXPORT
pack_int8_args
{
std
::
string
name
()
const
{
return
"gpu::pack_int8_args"
;
}
void
apply
(
module
&
m
)
const
;
...
...
src/targets/gpu/include/migraphx/gpu/rocblas.hpp
View file @
23cb7917
...
...
@@ -39,9 +39,10 @@ rocblas_handle_ptr create_rocblas_handle_ptr(hipStream_t s);
struct
context
;
bool
get_compute_fp32_flag
();
MIGRAPHX_GPU_EXPORT
bool
get_compute_fp32_flag
();
MIGRAPHX_GPU_EXPORT
bool
get_int8_x4_format
(
context
&
ctx
);
bool
get_int8_x4_format
(
context
&
ctx
);
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
...
...
src/targets/gpu/include/migraphx/gpu/target.hpp
View file @
23cb7917
...
...
@@ -26,13 +26,13 @@
#include <migraphx/program.hpp>
#include <migraphx/compile_options.hpp>
#include <migraphx/config.hpp>
#include <migraphx/
gpu/
config.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
struct
target
struct
MIGRAPHX_GPU_EXPORT
target
{
std
::
string
name
()
const
;
std
::
vector
<
pass
>
get_passes
(
migraphx
::
context
&
gctx
,
const
compile_options
&
options
)
const
;
...
...
src/targets/gpu/
driver/
include/migraphx/gpu/
driver/perf
.hpp
→
src/targets/gpu/include/migraphx/gpu/
time_op
.hpp
View file @
23cb7917
...
...
@@ -31,12 +31,10 @@
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
driver
{
std
::
pair
<
double
,
double
>
MIGRAPHX_GPU_EXPORT
std
::
pair
<
double
,
double
>
time_op
(
context
&
ictx
,
operation
op
,
const
std
::
vector
<
shape
>&
inputs
,
int
n
=
100
);
}
// namespace driver
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
...
...
src/targets/gpu/include/migraphx/gpu/tuning_config.hpp
0 → 100644
View file @
23cb7917
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_GPU_TUNING_CONFIG_HPP
#define MIGRAPHX_GUARD_GPU_TUNING_CONFIG_HPP
#include <migraphx/config.hpp>
#include <migraphx/value.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
struct
tuning_config
{
value
problem
;
std
::
vector
<
value
>
solutions
;
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif // MIGRAPHX_GUARD_GPU_TUNING_CONFIG_HPP
src/targets/gpu/include/migraphx/gpu/write_literals.hpp
View file @
23cb7917
...
...
@@ -32,7 +32,7 @@ struct module;
namespace
gpu
{
struct
write_literals
struct
MIGRAPHX_GPU_EXPORT
write_literals
{
context
*
ctx
=
nullptr
;
std
::
string
name
()
const
{
return
"gpu::write_literals"
;
}
...
...
src/targets/gpu/jit/ck_gemm.cpp
0 → 100644
View file @
23cb7917
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <fstream>
#include <migraphx/filesystem.hpp>
#include <migraphx/gpu/compiler.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/env.hpp>
#include <migraphx/file_buffer.hpp>
#include <migraphx/gpu/compile_gen.hpp>
#include <migraphx/gpu/compile_hip.hpp>
#include <migraphx/gpu/compile_hip_code_object.hpp>
#include <migraphx/module.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/reduce_dims.hpp>
#include <migraphx/stringutils.hpp>
#include "ck/host/device_gemm_multiple_d.hpp"
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
using
namespace
migraphx
::
gpu
::
gen
;
// NOLINT
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_LOG_CK_GEMM
);
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_CK_TUNING
);
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_CK_TUNING_VALUE
);
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_CK_DEBUG
);
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_TUNE_CK
);
// NOLINTNEXTLINE
static
const
char
*
const
ck_gemm_kernel
=
R"__migraphx__(
#include <args.hpp>
#include <migraphx/kernels/ck_gemm.hpp>
#include <migraphx/kernels/pointwise.hpp>
#include <migraphx/kernels/ops.hpp>
#include <${include}>
namespace migraphx {
${preamble}
extern "C" {
MIGRAPHX_GLOBAL void ${kernel}(${params})
{
transform_args(make_tensors(), rotate_last())(${args})([](auto... xs) {
ck_gemm<${solution}, ${blocks_per_batch}>(xs...);
});
}
}
} // namespace migraphx
)__migraphx__"
;
// NOLINTNEXTLINE
static
const
char
*
const
disable_warning_pragma
=
R"__migraphx__(
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
${content}
#pragma clang diagnostic pop
)__migraphx__"
;
template
<
class
P
>
static
std
::
string
ck_disable_warnings
(
P
p
)
{
return
interpolate_string
(
disable_warning_pragma
,
{{
"content"
,
std
::
string
{
p
.
first
,
p
.
second
}}});
}
static
std
::
unordered_map
<
std
::
string
,
std
::
string
>
create_ck_header_strings
()
{
std
::
unordered_map
<
std
::
string
,
std
::
string
>
result
;
auto
ck_headers
=
ck
::
host
::
GetHeaders
();
std
::
transform
(
ck_headers
.
begin
(),
ck_headers
.
end
(),
std
::
inserter
(
result
,
result
.
begin
()),
[
&
](
auto
&&
p
)
{
return
std
::
make_pair
(
p
.
first
,
ck_disable_warnings
(
p
.
second
));
});
return
result
;
}
static
std
::
vector
<
src_file
>
create_ck_headers
()
{
static
const
auto
&
header_strings
=
create_ck_header_strings
();
std
::
vector
<
src_file
>
srcs
;
std
::
transform
(
header_strings
.
begin
(),
header_strings
.
end
(),
std
::
back_inserter
(
srcs
),
[
&
](
auto
&&
p
)
{
return
src_file
{
fs
::
path
{
p
.
first
},
{
p
.
second
.
data
(),
p
.
second
.
data
()
+
p
.
second
.
size
()}};
});
return
srcs
;
}
static
const
std
::
vector
<
src_file
>&
ck_headers
()
{
static
const
auto
&
headers
=
create_ck_headers
();
return
headers
;
}
static
bool
transposed_matrix
(
const
shape
&
s
)
{
return
s
.
strides
().
back
()
!=
1
;
}
using
tuning_entry
=
std
::
pair
<
std
::
vector
<
shape
>
,
size_t
>
;
static
std
::
vector
<
tuning_entry
>
read_tuning
(
const
std
::
string
&
s
)
{
if
(
not
fs
::
exists
(
s
))
return
{};
return
from_value
<
std
::
vector
<
tuning_entry
>>
(
from_json_string
(
read_string
(
s
)));
}
static
float
matrix_distance
(
const
shape
&
x
,
const
shape
&
y
)
{
if
(
x
.
type
()
!=
y
.
type
())
return
std
::
numeric_limits
<
float
>::
max
();
if
(
transposed_matrix
(
x
)
!=
transposed_matrix
(
y
))
return
std
::
numeric_limits
<
float
>::
max
();
auto
sum_squared
=
std
::
inner_product
(
x
.
lens
().
rbegin
(),
x
.
lens
().
rbegin
()
+
2
,
y
.
lens
().
rbegin
(),
0
,
std
::
plus
<>
{},
[](
auto
a
,
auto
b
)
{
return
(
a
-
b
)
*
(
a
-
b
);
});
return
std
::
sqrt
(
sum_squared
);
}
static
std
::
size_t
get_tuning_for
(
const
std
::
vector
<
shape
>&
inputs
)
{
static
auto
tuning
=
read_tuning
(
string_value_of
(
MIGRAPHX_CK_TUNING
{},
""
));
if
(
tuning
.
empty
())
{
std
::
cout
<<
"*********** Warning: No CK tuning! for config:"
<<
std
::
endl
;
std
::
cout
<<
" "
<<
inputs
[
0
]
<<
std
::
endl
;
std
::
cout
<<
" "
<<
inputs
[
1
]
<<
std
::
endl
;
std
::
cout
<<
" "
<<
inputs
[
2
]
<<
std
::
endl
;
}
auto
it
=
std
::
find_if
(
tuning
.
begin
(),
tuning
.
end
(),
[
&
](
const
auto
&
p
)
{
return
p
.
first
==
inputs
;
});
if
(
it
==
tuning
.
end
())
{
std
::
cout
<<
"*********** Warning: CK tuning missing for config!"
<<
std
::
endl
;
std
::
cout
<<
" "
<<
inputs
[
0
]
<<
std
::
endl
;
std
::
cout
<<
" "
<<
inputs
[
1
]
<<
std
::
endl
;
std
::
cout
<<
" "
<<
inputs
[
2
]
<<
std
::
endl
;
std
::
vector
<
std
::
pair
<
float
,
std
::
size_t
>>
w
;
std
::
transform
(
tuning
.
begin
(),
tuning
.
end
(),
std
::
back_inserter
(
w
),
[
&
](
const
auto
&
p
)
{
if
(
inputs
.
size
()
<
3
or
p
.
first
.
size
()
<
3
)
MIGRAPHX_THROW
(
"Invalid CK config"
);
auto
avg_distance
=
std
::
inner_product
(
p
.
first
.
begin
(),
p
.
first
.
begin
()
+
3
,
inputs
.
begin
(),
0.0
f
,
std
::
plus
<>
{},
[](
const
auto
&
x
,
const
auto
&
y
)
{
return
matrix_distance
(
x
,
y
)
/
3.0
f
;
});
return
std
::
make_pair
(
avg_distance
,
p
.
second
);
});
std
::
sort
(
w
.
begin
(),
w
.
end
());
std
::
size_t
default_value
=
4
;
if
(
not
w
.
empty
())
default_value
=
w
.
front
().
second
;
auto
tuning_val
=
value_of
(
MIGRAPHX_CK_TUNING_VALUE
{},
default_value
);
std
::
cout
<<
"*********** Warning: CK try tuning: "
<<
tuning_val
<<
std
::
endl
;
return
tuning_val
;
}
return
it
->
second
;
}
struct
ck_gemm_compiler
:
compiler
<
ck_gemm_compiler
>
{
static
std
::
string
get_layout
(
const
shape
&
s
)
{
return
transposed_matrix
(
s
)
?
"ck::tensor_layout::gemm::ColumnMajor"
:
"ck::tensor_layout::gemm::RowMajor"
;
}
static
ck
::
host
::
DataType
get_type
(
const
shape
&
s
)
{
if
(
s
.
type
()
==
shape
::
half_type
)
return
ck
::
host
::
DataType
::
Half
;
else
if
(
s
.
type
()
==
shape
::
float_type
)
return
ck
::
host
::
DataType
::
Float
;
else
if
(
s
.
type
()
==
shape
::
int8_type
)
return
ck
::
host
::
DataType
::
Int8
;
else
if
(
s
.
type
()
==
shape
::
int32_type
)
return
ck
::
host
::
DataType
::
Int32
;
MIGRAPHX_THROW
(
"Unsupported ck type"
);
}
template
<
class
Iterator
,
class
F
>
static
std
::
string
ck_tuple
(
Iterator
start
,
Iterator
last
,
F
f
)
{
std
::
vector
<
std
::
string
>
s
;
std
::
transform
(
start
,
last
,
std
::
back_inserter
(
s
),
f
);
return
"ck::Tuple<"
+
join_strings
(
s
,
","
)
+
">"
;
}
static
std
::
vector
<
shape
>
adjust_inputs
(
std
::
vector
<
shape
>
inputs
,
bool
&
swap_inputs
)
{
swap_inputs
=
false
;
auto
c_shape
=
inputs
.
back
();
if
(
not
transposed_matrix
(
c_shape
))
return
inputs
;
std
::
vector
<
int64_t
>
perm
(
c_shape
.
lens
().
size
());
std
::
iota
(
perm
.
begin
(),
perm
.
end
(),
0
);
std
::
swap
(
perm
[
perm
.
size
()
-
1
],
perm
[
perm
.
size
()
-
2
]);
std
::
transform
(
inputs
.
begin
(),
inputs
.
end
(),
inputs
.
begin
(),
[
&
](
shape
s
)
{
return
reorder_shape
(
s
,
perm
);
});
swap_inputs
=
true
;
return
inputs
;
}
static
std
::
size_t
get_batch_count
(
const
shape
&
s
)
{
return
std
::
accumulate
(
s
.
lens
().
rbegin
()
+
2
,
s
.
lens
().
rend
(),
std
::
size_t
{
1
},
std
::
multiplies
<
std
::
size_t
>
());
}
static
void
fold_batch_dims
(
shape
&
s
)
{
auto
lens
=
s
.
lens
();
if
(
lens
.
size
()
<=
2
)
return
;
auto
batch_count
=
get_batch_count
(
s
);
auto
m1
=
lens
.
at
(
lens
.
size
()
-
2
);
auto
m2
=
lens
.
at
(
lens
.
size
()
-
1
);
if
(
transposed_matrix
(
s
))
s
=
shape
{
s
.
type
(),
{
m1
,
m2
*
batch_count
}};
else
s
=
shape
{
s
.
type
(),
{
m1
*
batch_count
,
m2
}};
}
static
void
remove_batch_dims
(
shape
&
s
)
{
auto
lens
=
s
.
lens
();
if
(
lens
.
size
()
<=
2
)
return
;
auto
m1
=
lens
.
at
(
lens
.
size
()
-
2
);
auto
m2
=
lens
.
at
(
lens
.
size
()
-
1
);
s
=
shape
{
s
.
type
(),
{
m1
,
m2
}};
}
std
::
vector
<
std
::
string
>
names
()
const
{
return
{
"ck_gemm"
,
"gpu::ck_gemm"
};
}
static
bool
standard_batch
(
const
shape
&
s
)
{
if
(
s
.
lens
().
size
()
<
3
)
return
true
;
std
::
vector
<
std
::
size_t
>
lens
(
s
.
lens
().
begin
(),
s
.
lens
().
end
()
-
2
);
std
::
vector
<
std
::
size_t
>
strides
(
s
.
strides
().
begin
(),
s
.
strides
().
end
()
-
2
);
auto
base
=
*
(
s
.
lens
().
end
()
-
2
)
*
*
(
s
.
lens
().
end
()
-
1
);
std
::
transform
(
strides
.
begin
(),
strides
.
end
(),
strides
.
begin
(),
[
&
](
auto
stride
)
{
return
stride
/
base
;
});
return
shape
{
s
.
type
(),
lens
,
strides
}.
standard
();
}
bool
can_fold_batch
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
const
auto
&
b_shape
=
inputs
[
1
];
if
(
std
::
any_of
(
inputs
.
begin
()
+
2
,
inputs
.
end
()
-
1
,
[](
auto
input
)
{
return
not
standard_batch
(
input
);
}))
return
false
;
const
auto
&
b_strides
=
b_shape
.
strides
();
return
std
::
all_of
(
b_strides
.
begin
(),
b_strides
.
end
()
-
2
,
[](
auto
stride
)
{
return
stride
==
0
;
});
}
ck
::
host
::
device_gemm_multiple_d
::
Problem
create_problem
(
const
std
::
vector
<
shape
>&
inputs
,
const
value
&
v
)
const
{
const
auto
&
a_shape
=
inputs
[
0
];
const
auto
&
b_shape
=
inputs
[
1
];
const
auto
&
c_shape
=
inputs
.
back
();
// cppcheck-suppress unreadVariable
auto
rank
=
a_shape
.
ndim
();
auto
batch_count
=
get_batch_count
(
c_shape
);
auto
m
=
c_shape
.
lens
()[
rank
-
2
];
m
=
can_fold_batch
(
inputs
)
?
m
*
batch_count
:
m
;
auto
n
=
c_shape
.
lens
().
back
();
auto
k
=
a_shape
.
lens
().
back
();
const
bool
trans_a
=
transposed_matrix
(
a_shape
);
const
bool
trans_b
=
transposed_matrix
(
b_shape
);
const
bool
trans_e
=
transposed_matrix
(
c_shape
);
const
auto
a_type
=
get_type
(
a_shape
);
const
auto
b_type
=
get_type
(
b_shape
);
const
auto
e_type
=
get_type
(
c_shape
);
std
::
vector
<
bool
>
ds_layout
;
std
::
transform
(
inputs
.
begin
()
+
2
,
inputs
.
end
()
-
1
,
std
::
back_inserter
(
ds_layout
),
[](
const
auto
&
i
)
{
return
transposed_matrix
(
i
);
});
std
::
vector
<
ck
::
host
::
DataType
>
ds_type
;
std
::
transform
(
inputs
.
begin
()
+
2
,
inputs
.
end
()
-
1
,
std
::
back_inserter
(
ds_type
),
[](
const
auto
&
i
)
{
return
get_type
(
i
);
});
std
::
string
ck_passthrough
=
"ck_passthrough"
;
std
::
string
cde_op
=
ck_passthrough
;
assert
(
inputs
.
size
()
<
4
or
v
.
contains
(
"post"
));
if
(
v
.
contains
(
"post"
))
{
cde_op
=
v
.
at
(
"post"
).
to
<
std
::
string
>
();
}
return
ck
::
host
::
device_gemm_multiple_d
::
Problem
{
m
,
n
,
k
,
trans_a
,
trans_b
,
trans_e
,
ds_layout
,
a_type
,
b_type
,
e_type
,
ds_type
,
ck_passthrough
,
ck_passthrough
,
cde_op
};
}
operation
compile_op
(
context
&
ctx
,
const
std
::
vector
<
shape
>&
inputs
,
const
value
&
v
)
const
{
const
auto
&
a_shape
=
inputs
[
0
];
const
auto
&
b_shape
=
inputs
[
1
];
const
auto
&
c_shape
=
inputs
.
back
();
auto
tuning_value
=
v
.
get
(
"tuning_value"
,
4
);
if
(
not
v
.
contains
(
"tuning_value"
))
tuning_value
=
get_tuning_for
({
a_shape
,
b_shape
,
c_shape
});
auto
batch_count
=
get_batch_count
(
c_shape
);
auto
problem
=
create_problem
(
inputs
,
v
);
const
auto
include_header
=
problem
.
GetIncludeHeader
();
const
auto
solutions
=
problem
.
GetSolutions
(
ctx
.
get_current_device
().
get_gfx_name
());
const
auto
&
solution
=
solutions
.
at
(
tuning_value
);
const
auto
template_str
=
solution
.
template_str
;
const
auto
blocks_per_batch
=
solution
.
grid_size
;
const
auto
block_size
=
solution
.
block_size
;
hip_compile_options
options
;
options
.
additional_src_files
=
ck_headers
();
auto
grid_size
=
can_fold_batch
(
inputs
)
?
blocks_per_batch
:
batch_count
*
blocks_per_batch
;
options
.
set_launch_params
(
v
,
grid_size
*
block_size
,
block_size
);
options
.
inputs
=
inputs
;
options
.
output
=
c_shape
;
options
.
kernel_name
=
v
.
get
(
"kernel"
,
"ck_gemm_kernel"
);
options
.
virtual_inputs
=
inputs
;
if
(
can_fold_batch
(
inputs
))
{
auto
vinputs
=
inputs
;
fold_batch_dims
(
vinputs
[
0
]);
remove_batch_dims
(
vinputs
[
1
]);
std
::
for_each
(
vinputs
.
begin
()
+
2
,
vinputs
.
end
(),
fold_batch_dims
);
options
.
virtual_inputs
=
vinputs
;
}
if
(
v
.
get
(
"check"
,
false
)
or
enabled
(
MIGRAPHX_CK_DEBUG
{}))
options
.
params
+=
" -DMIGRAPHX_CK_CHECK=1"
;
auto
src
=
interpolate_string
(
ck_gemm_kernel
,
{{
"solution"
,
template_str
},
{
"include"
,
include_header
},
{
"params"
,
enum_params
(
inputs
.
size
(),
"void * private_p"
)},
{
"args"
,
enum_params
(
inputs
.
size
(),
"private_p"
)},
{
"blocks_per_batch"
,
to_string
(
blocks_per_batch
)},
{
"preamble"
,
v
.
get
(
"preamble"
,
std
::
string
{})},
{
"kernel"
,
options
.
kernel_name
}});
return
compile_hip_code_object
(
src
,
options
);
}
value
create_settings
(
instruction_ref
ins
,
const
operation
&
op
)
const
{
auto
v
=
op
.
to_value
();
v
[
"kernel"
]
=
"ck_gemm_kernel"
;
if
(
not
ins
->
module_inputs
().
empty
())
{
auto
*
pm
=
ins
->
module_inputs
().
front
();
v
[
"preamble"
]
=
generate_pointwise
(
*
pm
,
"post_ck_gemm_function"
)
+
"
\n
MIGRAPHX_LIFT_CLASS(post_ck_gemm, post_ck_gemm_function);"
;
v
[
"post"
]
=
"ck_function_adaptor<post_ck_gemm>"
;
v
[
"kernel"
]
=
"ck_gemm_"
+
generate_name_from_ops
(
*
pm
)
+
"_kernel"
;
}
return
v
;
}
compiler_replace
compile
(
context
&
ctx
,
instruction_ref
ins
,
const
operation
&
op
,
const
value
&
solution
)
const
{
auto
shapes
=
to_shapes
(
ins
->
inputs
());
auto
v
=
create_settings
(
ins
,
op
);
if
(
not
solution
.
is_null
())
v
[
"tuning_value"
]
=
solution
;
return
{
compile_op
(
ctx
,
shapes
,
v
),
[
=
](
module
&
m
,
instruction_ref
ins2
,
const
operation
&
code_object
)
{
if
(
enabled
(
MIGRAPHX_LOG_CK_GEMM
{}))
{
std
::
vector
<
shape
>
gemm_shapes
{
shapes
[
0
],
shapes
[
1
],
shapes
.
back
().
with_type
(
shapes
[
0
].
type
())};
std
::
cout
<<
"gpu::ck_gemm: "
<<
to_json_string
(
to_value
(
gemm_shapes
))
<<
std
::
endl
;
}
m
.
replace_instruction
(
ins2
,
code_object
,
ins2
->
inputs
());
}};
}
optional
<
tuning_config
>
get_tuning_config
(
context
&
ctx
,
instruction_ref
ins
,
const
operation
&
op
,
bool
exhaustive
)
const
{
if
(
not
exhaustive
and
not
enabled
(
MIGRAPHX_TUNE_CK
{}))
return
nullopt
;
tuning_config
tc
;
auto
shapes
=
to_shapes
(
ins
->
inputs
());
auto
problem
=
create_problem
(
shapes
,
create_settings
(
ins
,
op
));
auto
solutions
=
problem
.
GetSolutions
(
ctx
.
get_current_device
().
get_gfx_name
());
tc
.
solutions
.
resize
(
solutions
.
size
());
std
::
iota
(
tc
.
solutions
.
begin
(),
tc
.
solutions
.
end
(),
0
);
std
::
vector
<
shape
>
gemm_shapes
{
shapes
[
0
],
shapes
[
1
],
shapes
.
back
()};
tc
.
problem
=
to_value
(
gemm_shapes
);
return
tc
;
}
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/jit/concat.cpp
View file @
23cb7917
...
...
@@ -47,7 +47,7 @@ ${preamble}
extern "C" {
__global__
void ${kernel}(${params})
MIGRAPHX_GLOBAL
void ${kernel}(${params})
{
transform_args(make_tensors(), rotate_last(), ${transformers})(${args})([](auto y, ${concat_params}, auto... xs) {
concat<${axis}>(${concat_args})(${post}, y, xs...);
...
...
@@ -108,7 +108,7 @@ struct concat_compiler : compiler<concat_compiler>
v
[
"post"
]
=
"MIGRAPHX_LIFT(post_concat)"
;
v
[
"kernel"
]
=
"concat_"
+
generate_name_from_ops
(
*
pm
)
+
"_kernel"
;
}
return
replace
(
compile_op
(
ctx
,
to_shapes
(
ins
->
inputs
()),
v
)
)
;
return
compile_op
(
ctx
,
to_shapes
(
ins
->
inputs
()),
v
);
}
};
...
...
src/targets/gpu/jit/gather.cpp
View file @
23cb7917
...
...
@@ -44,7 +44,7 @@ namespace migraphx {
extern "C" {
__global__
void gather_kernel(void* in_data, void* in_indices, void* output)
MIGRAPHX_GLOBAL
void gather_kernel(void* in_data, void* in_indices, void* output)
{
make_tensors()(in_data, in_indices, output)([](auto&&... xs) {
gather<${axis}>(xs...);
...
...
@@ -80,7 +80,7 @@ struct gather_compiler : compiler<gather_compiler>
compiler_replace
compile
(
context
&
ctx
,
instruction_ref
ins
,
const
operation
&
op
)
const
{
return
replace
(
compile_op
(
ctx
,
to_shapes
(
ins
->
inputs
()),
op
.
to_value
())
)
;
return
compile_op
(
ctx
,
to_shapes
(
ins
->
inputs
()),
op
.
to_value
());
}
};
...
...
Prev
1
…
11
12
13
14
15
16
17
18
19
…
23
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment