Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
23cb7917
"tasks/git@developer.sourcefind.cn:sugon_wxj/megatron-lm.git" did not exist on "49a38d5e0b930c63a9c735444b1cea861d12bc43"
Unverified
Commit
23cb7917
authored
Aug 16, 2023
by
Brian Pickrell
Committed by
GitHub
Aug 16, 2023
Browse files
Merge branch 'develop' into blas_tuning
parents
b5fcc0bc
ea32ca70
Changes
458
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
665 additions
and
77 deletions
+665
-77
src/targets/gpu/include/migraphx/gpu/device/scatter.hpp
src/targets/gpu/include/migraphx/gpu/device/scatter.hpp
+2
-2
src/targets/gpu/include/migraphx/gpu/device/topk.hpp
src/targets/gpu/include/migraphx/gpu/device/topk.hpp
+13
-13
src/targets/gpu/include/migraphx/gpu/device_name.hpp
src/targets/gpu/include/migraphx/gpu/device_name.hpp
+5
-3
src/targets/gpu/include/migraphx/gpu/fuse_ck.hpp
src/targets/gpu/include/migraphx/gpu/fuse_ck.hpp
+48
-0
src/targets/gpu/include/migraphx/gpu/fuse_mlir.hpp
src/targets/gpu/include/migraphx/gpu/fuse_mlir.hpp
+3
-2
src/targets/gpu/include/migraphx/gpu/hip.hpp
src/targets/gpu/include/migraphx/gpu/hip.hpp
+31
-20
src/targets/gpu/include/migraphx/gpu/kernel.hpp
src/targets/gpu/include/migraphx/gpu/kernel.hpp
+2
-2
src/targets/gpu/include/migraphx/gpu/lowering.hpp
src/targets/gpu/include/migraphx/gpu/lowering.hpp
+3
-4
src/targets/gpu/include/migraphx/gpu/miopen.hpp
src/targets/gpu/include/migraphx/gpu/miopen.hpp
+29
-7
src/targets/gpu/include/migraphx/gpu/mlir.hpp
src/targets/gpu/include/migraphx/gpu/mlir.hpp
+15
-8
src/targets/gpu/include/migraphx/gpu/pack_args.hpp
src/targets/gpu/include/migraphx/gpu/pack_args.hpp
+2
-2
src/targets/gpu/include/migraphx/gpu/pack_int8_args.hpp
src/targets/gpu/include/migraphx/gpu/pack_int8_args.hpp
+1
-2
src/targets/gpu/include/migraphx/gpu/rocblas.hpp
src/targets/gpu/include/migraphx/gpu/rocblas.hpp
+3
-2
src/targets/gpu/include/migraphx/gpu/target.hpp
src/targets/gpu/include/migraphx/gpu/target.hpp
+2
-2
src/targets/gpu/include/migraphx/gpu/time_op.hpp
src/targets/gpu/include/migraphx/gpu/time_op.hpp
+1
-3
src/targets/gpu/include/migraphx/gpu/tuning_config.hpp
src/targets/gpu/include/migraphx/gpu/tuning_config.hpp
+43
-0
src/targets/gpu/include/migraphx/gpu/write_literals.hpp
src/targets/gpu/include/migraphx/gpu/write_literals.hpp
+1
-1
src/targets/gpu/jit/ck_gemm.cpp
src/targets/gpu/jit/ck_gemm.cpp
+457
-0
src/targets/gpu/jit/concat.cpp
src/targets/gpu/jit/concat.cpp
+2
-2
src/targets/gpu/jit/gather.cpp
src/targets/gpu/jit/gather.cpp
+2
-2
No files found.
src/targets/gpu/include/migraphx/gpu/device/scatter.hpp
View file @
23cb7917
...
@@ -25,7 +25,7 @@
...
@@ -25,7 +25,7 @@
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_SCATTER_HPP
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_SCATTER_HPP
#include <migraphx/argument.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/config.hpp>
#include <migraphx/
gpu/device/
config.hpp>
#include <hip/hip_runtime_api.h>
#include <hip/hip_runtime_api.h>
namespace
migraphx
{
namespace
migraphx
{
...
@@ -33,7 +33,7 @@ inline namespace MIGRAPHX_INLINE_NS {
...
@@ -33,7 +33,7 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace
gpu
{
namespace
gpu
{
namespace
device
{
namespace
device
{
argument
scatter
(
argument
MIGRAPHX_DEVICE_EXPORT
scatter
(
hipStream_t
stream
,
argument
result
,
argument
arg0
,
argument
arg1
,
argument
arg2
,
int64_t
axis
);
hipStream_t
stream
,
argument
result
,
argument
arg0
,
argument
arg1
,
argument
arg2
,
int64_t
axis
);
}
// namespace device
}
// namespace device
...
...
src/targets/gpu/include/migraphx/gpu/device/topk.hpp
View file @
23cb7917
...
@@ -25,7 +25,7 @@
...
@@ -25,7 +25,7 @@
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_TOPK_HPP
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_TOPK_HPP
#include <migraphx/argument.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/config.hpp>
#include <migraphx/
gpu/device/
config.hpp>
#include <hip/hip_runtime_api.h>
#include <hip/hip_runtime_api.h>
namespace
migraphx
{
namespace
migraphx
{
...
@@ -33,19 +33,19 @@ inline namespace MIGRAPHX_INLINE_NS {
...
@@ -33,19 +33,19 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace
gpu
{
namespace
gpu
{
namespace
device
{
namespace
device
{
argument
topk_smallest
(
hipStream_t
stream
,
argument
MIGRAPHX_DEVICE_EXPORT
topk_smallest
(
hipStream_t
stream
,
const
argument
&
val_res
,
const
argument
&
val_res
,
const
argument
&
ind_res
,
const
argument
&
ind_res
,
const
argument
&
arg
,
const
argument
&
arg
,
int64_t
k
,
int64_t
k
,
int64_t
axis
);
int64_t
axis
);
argument
topk_largest
(
hipStream_t
stream
,
argument
MIGRAPHX_DEVICE_EXPORT
topk_largest
(
hipStream_t
stream
,
const
argument
&
val_res
,
const
argument
&
val_res
,
const
argument
&
ind_res
,
const
argument
&
ind_res
,
const
argument
&
arg
,
const
argument
&
arg
,
int64_t
k
,
int64_t
k
,
int64_t
axis
);
int64_t
axis
);
}
// namespace device
}
// namespace device
}
// namespace gpu
}
// namespace gpu
...
...
src/targets/gpu/include/migraphx/gpu/device_name.hpp
View file @
23cb7917
...
@@ -24,16 +24,18 @@
...
@@ -24,16 +24,18 @@
#ifndef MIGRAPHX_GUARD_GPU_DEVICE_NAME_HPP
#ifndef MIGRAPHX_GUARD_GPU_DEVICE_NAME_HPP
#define MIGRAPHX_GUARD_GPU_DEVICE_NAME_HPP
#define MIGRAPHX_GUARD_GPU_DEVICE_NAME_HPP
#include <migraphx/config.hpp>
#include <migraphx/
gpu/
config.hpp>
#include <string>
#include <string>
struct
hipDeviceProp_t
;
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
gpu
{
std
::
string
get_device_name
();
MIGRAPHX_GPU_EXPORT
std
::
string
get_device_name
();
int
get_device_id
();
MIGRAPHX_GPU_EXPORT
int
get_device_id
();
}
// namespace gpu
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace MIGRAPHX_INLINE_NS
...
...
src/targets/gpu/include/migraphx/gpu/fuse_ck.hpp
0 → 100644
View file @
23cb7917
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_GPU_FUSE_CK_HPP
#define MIGRAPHX_GUARD_GPU_FUSE_CK_HPP
#include <migraphx/config.hpp>
#include <migraphx/gpu/context.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
struct
module_pass_manager
;
namespace
gpu
{
struct
fuse_ck
{
context
*
ctx
=
nullptr
;
std
::
string
name
()
const
{
return
"gpu::fuse_ck"
;
}
void
apply
(
module_pass_manager
&
mpm
)
const
;
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif // MIGRAPHX_GUARD_GPU_FUSE_CK_HPP
src/targets/gpu/include/migraphx/gpu/fuse_mlir.hpp
View file @
23cb7917
...
@@ -24,7 +24,6 @@
...
@@ -24,7 +24,6 @@
#ifndef MIGRAPHX_GUARD_GPU_FUSE_MLIR_HPP
#ifndef MIGRAPHX_GUARD_GPU_FUSE_MLIR_HPP
#define MIGRAPHX_GUARD_GPU_FUSE_MLIR_HPP
#define MIGRAPHX_GUARD_GPU_FUSE_MLIR_HPP
#include <migraphx/config.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/context.hpp>
namespace
migraphx
{
namespace
migraphx
{
...
@@ -34,7 +33,9 @@ struct module_pass_manager;
...
@@ -34,7 +33,9 @@ struct module_pass_manager;
namespace
gpu
{
namespace
gpu
{
struct
fuse_mlir
MIGRAPHX_GPU_EXPORT
bool
mlir_enabled
();
struct
MIGRAPHX_GPU_EXPORT
fuse_mlir
{
{
context
*
ctx
=
nullptr
;
context
*
ctx
=
nullptr
;
std
::
string
name
()
const
{
return
"gpu::fuse_mlir"
;
}
std
::
string
name
()
const
{
return
"gpu::fuse_mlir"
;
}
...
...
src/targets/gpu/include/migraphx/gpu/hip.hpp
View file @
23cb7917
...
@@ -24,11 +24,12 @@
...
@@ -24,11 +24,12 @@
#ifndef MIGRAPHX_GUARD_MIGRAPHLIB_HIP_HPP
#ifndef MIGRAPHX_GUARD_MIGRAPHLIB_HIP_HPP
#define MIGRAPHX_GUARD_MIGRAPHLIB_HIP_HPP
#define MIGRAPHX_GUARD_MIGRAPHLIB_HIP_HPP
#include <migraphx/config.hpp>
#include <migraphx/
gpu/
config.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/literal.hpp>
#include <migraphx/literal.hpp>
#include <migraphx/check_shapes.hpp>
#include <migraphx/check_shapes.hpp>
#include <migraphx/functional.hpp>
#include <migraphx/functional.hpp>
#include <migraphx/dyn_output.hpp>
#include <utility>
#include <utility>
namespace
migraphx
{
namespace
migraphx
{
...
@@ -37,26 +38,26 @@ namespace gpu {
...
@@ -37,26 +38,26 @@ namespace gpu {
struct
context
;
struct
context
;
std
::
string
hip_error
(
int
error
);
MIGRAPHX_GPU_EXPORT
std
::
string
hip_error
(
int
error
);
argument
allocate_gpu
(
const
shape
&
s
,
bool
host
=
false
);
MIGRAPHX_GPU_EXPORT
argument
allocate_gpu
(
const
shape
&
s
,
bool
host
=
false
);
argument
register_on_gpu
(
const
argument
&
arg
);
MIGRAPHX_GPU_EXPORT
argument
register_on_gpu
(
const
argument
&
arg
);
argument
to_gpu
(
const
argument
&
arg
,
bool
host
=
false
);
MIGRAPHX_GPU_EXPORT
argument
to_gpu
(
const
argument
&
arg
,
bool
host
=
false
);
argument
from_gpu
(
const
argument
&
arg
);
MIGRAPHX_GPU_EXPORT
argument
from_gpu
(
const
argument
&
arg
);
void
set_device
(
std
::
size_t
id
);
MIGRAPHX_GPU_EXPORT
void
set_device
(
std
::
size_t
id
);
void
gpu_sync
();
MIGRAPHX_GPU_EXPORT
void
gpu_sync
();
void
gpu_sync
(
const
context
&
ctx
);
MIGRAPHX_GPU_EXPORT
void
gpu_sync
(
const
context
&
ctx
);
void
gpu_copy
(
context
&
ctx
,
const
argument
&
src
,
const
argument
&
dst
);
MIGRAPHX_GPU_EXPORT
void
gpu_copy
(
context
&
ctx
,
const
argument
&
src
,
const
argument
&
dst
);
void
copy_to_gpu
(
context
&
ctx
,
const
argument
&
src
,
const
argument
&
dst
);
MIGRAPHX_GPU_EXPORT
void
copy_to_gpu
(
context
&
ctx
,
const
argument
&
src
,
const
argument
&
dst
);
void
copy_from_gpu
(
context
&
ctx
,
const
argument
&
src
,
const
argument
&
dst
);
MIGRAPHX_GPU_EXPORT
void
copy_from_gpu
(
context
&
ctx
,
const
argument
&
src
,
const
argument
&
dst
);
argument
get_preallocation
(
context
&
ctx
,
const
std
::
string
&
id
);
MIGRAPHX_GPU_EXPORT
argument
get_preallocation
(
context
&
ctx
,
const
std
::
string
&
id
);
struct
hip_allocate
struct
hip_allocate
{
{
...
@@ -91,7 +92,7 @@ struct hip_sync_stream
...
@@ -91,7 +92,7 @@ struct hip_sync_stream
return
inputs
.
front
();
return
inputs
.
front
();
}
}
argument
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
argument
compute
(
const
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
{
{
gpu_sync
(
ctx
);
gpu_sync
(
ctx
);
if
(
args
.
empty
())
if
(
args
.
empty
())
...
@@ -112,7 +113,7 @@ struct hip_copy_to_gpu
...
@@ -112,7 +113,7 @@ struct hip_copy_to_gpu
std
::
string
name
()
const
{
return
"hip::copy_to_gpu"
;
}
std
::
string
name
()
const
{
return
"hip::copy_to_gpu"
;
}
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
{
{
check_shapes
{
inputs
,
*
this
}.
has
(
1
,
2
).
same_type
();
check_shapes
{
inputs
,
*
this
,
true
}.
has
(
1
,
2
).
same_type
();
return
inputs
.
at
(
0
);
return
inputs
.
at
(
0
);
}
}
argument
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
argument
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
...
@@ -121,6 +122,10 @@ struct hip_copy_to_gpu
...
@@ -121,6 +122,10 @@ struct hip_copy_to_gpu
if
(
args
.
size
()
==
1
)
if
(
args
.
size
()
==
1
)
return
input
;
return
input
;
argument
result
=
args
[
1
].
share
();
argument
result
=
args
[
1
].
share
();
if
(
result
.
get_shape
().
dynamic
())
{
result
=
result
.
reshape
(
args
[
0
].
get_shape
());
}
gpu_copy
(
ctx
,
input
,
result
);
gpu_copy
(
ctx
,
input
,
result
);
// Associate the input since it was registered with hip
// Associate the input since it was registered with hip
return
{
result
.
get_shape
(),
[
input
,
result
]()
mutable
{
return
result
.
data
();
}};
return
{
result
.
get_shape
(),
[
input
,
result
]()
mutable
{
return
result
.
data
();
}};
...
@@ -138,19 +143,24 @@ struct hip_copy_from_gpu
...
@@ -138,19 +143,24 @@ struct hip_copy_from_gpu
std
::
string
name
()
const
{
return
"hip::copy_from_gpu"
;
}
std
::
string
name
()
const
{
return
"hip::copy_from_gpu"
;
}
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
{
{
check_shapes
{
inputs
,
*
this
}.
has
(
1
,
2
).
same_type
();
check_shapes
{
inputs
,
*
this
,
true
}.
has
(
1
,
2
).
same_type
();
return
inputs
.
at
(
0
);
return
inputs
.
at
(
0
);
}
}
argument
argument
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
compute
(
context
&
ctx
,
const
dyn_output
&
dyn_out
,
const
std
::
vector
<
argument
>&
args
)
const
{
{
if
(
args
.
size
()
==
1
)
if
(
args
.
size
()
==
1
)
{
{
argument
result
=
allocate_gpu
(
out
put_shape
,
true
);
argument
result
=
allocate_gpu
(
dyn_out
.
com
put
ed
_shape
,
true
);
gpu_copy
(
ctx
,
args
[
0
],
result
);
gpu_copy
(
ctx
,
args
[
0
],
result
);
return
result
;
return
result
;
}
}
copy_from_gpu
(
ctx
,
args
[
0
],
args
[
1
]);
argument
input
=
args
[
0
].
share
();
if
(
input
.
get_shape
().
dynamic
())
{
input
=
input
.
reshape
(
args
[
1
].
get_shape
());
}
copy_from_gpu
(
ctx
,
input
,
args
[
1
]);
return
args
[
1
];
return
args
[
1
];
}
}
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
args
)
const
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
args
)
const
...
@@ -177,7 +187,8 @@ struct hip_copy
...
@@ -177,7 +187,8 @@ struct hip_copy
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
)
const
{
return
1
;
}
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
)
const
{
return
1
;
}
};
};
void
store_preallocated_param
(
context
&
ctx
,
const
std
::
string
&
id
,
const
argument
&
a
);
MIGRAPHX_GPU_EXPORT
void
store_preallocated_param
(
context
&
ctx
,
const
std
::
string
&
id
,
const
argument
&
a
);
struct
hip_allocate_memory
struct
hip_allocate_memory
{
{
...
...
src/targets/gpu/include/migraphx/gpu/kernel.hpp
View file @
23cb7917
...
@@ -24,7 +24,7 @@
...
@@ -24,7 +24,7 @@
#ifndef MIGRAPHX_GUARD_RTGLIB_KERNEL_HPP
#ifndef MIGRAPHX_GUARD_RTGLIB_KERNEL_HPP
#define MIGRAPHX_GUARD_RTGLIB_KERNEL_HPP
#define MIGRAPHX_GUARD_RTGLIB_KERNEL_HPP
#include <migraphx/config.hpp>
#include <migraphx/
gpu/
config.hpp>
#include <migraphx/gpu/pack_args.hpp>
#include <migraphx/gpu/pack_args.hpp>
#include <hip/hip_runtime_api.h>
#include <hip/hip_runtime_api.h>
#include <memory>
#include <memory>
...
@@ -37,7 +37,7 @@ namespace gpu {
...
@@ -37,7 +37,7 @@ namespace gpu {
struct
kernel_impl
;
struct
kernel_impl
;
struct
kernel
struct
MIGRAPHX_GPU_EXPORT
kernel
{
{
kernel
()
=
default
;
kernel
()
=
default
;
kernel
(
const
char
*
image
,
const
std
::
string
&
name
);
kernel
(
const
char
*
image
,
const
std
::
string
&
name
);
...
...
src/targets/gpu/include/migraphx/gpu/lowering.hpp
View file @
23cb7917
...
@@ -24,13 +24,12 @@
...
@@ -24,13 +24,12 @@
#ifndef MIGRAPHX_GUARD_RTGLIB_MIOPEN_LOWERING_HPP
#ifndef MIGRAPHX_GUARD_RTGLIB_MIOPEN_LOWERING_HPP
#define MIGRAPHX_GUARD_RTGLIB_MIOPEN_LOWERING_HPP
#define MIGRAPHX_GUARD_RTGLIB_MIOPEN_LOWERING_HPP
#include <migraphx/config.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/context.hpp>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
struct
module
;
struct
module
_pass_manager
;
namespace
gpu
{
namespace
gpu
{
...
@@ -40,12 +39,12 @@ namespace gpu {
...
@@ -40,12 +39,12 @@ namespace gpu {
* * Maps instructions to their GPU-specific counterparts.
* * Maps instructions to their GPU-specific counterparts.
* * Inserts `allocate` instructions before GPU operators.
* * Inserts `allocate` instructions before GPU operators.
*/
*/
struct
lowering
struct
MIGRAPHX_GPU_EXPORT
lowering
{
{
context
*
ctx
;
context
*
ctx
;
bool
offload_copy
;
bool
offload_copy
;
std
::
string
name
()
const
{
return
"gpu::lowering"
;
}
std
::
string
name
()
const
{
return
"gpu::lowering"
;
}
void
apply
(
module
&
m
)
const
;
void
apply
(
module
_pass_manager
&
mp
m
)
const
;
};
};
}
// namespace gpu
}
// namespace gpu
...
...
src/targets/gpu/include/migraphx/gpu/miopen.hpp
View file @
23cb7917
...
@@ -75,21 +75,43 @@ using miopen_find_options = MIGRAPHX_MANAGE_PTR(miopenFindOptions_t, miopenDestr
...
@@ -75,21 +75,43 @@ using miopen_find_options = MIGRAPHX_MANAGE_PTR(miopenFindOptions_t, miopenDestr
using
miopen_problem
=
MIGRAPHX_MANAGE_PTR
(
miopenProblem_t
,
miopenDestroyProblem
);
using
miopen_problem
=
MIGRAPHX_MANAGE_PTR
(
miopenProblem_t
,
miopenDestroyProblem
);
using
miopen_solution
=
MIGRAPHX_MANAGE_PTR
(
miopenSolution_t
,
miopenDestroySolution
);
using
miopen_solution
=
MIGRAPHX_MANAGE_PTR
(
miopenSolution_t
,
miopenDestroySolution
);
inline
miopen_solution
inline
miopen_solution
find_solution
(
miopenHandle_t
handle
,
find_solution
(
miopenHandle_t
handle
,
miopenProblem_t
problem
,
bool
tune
=
false
)
size_t
num_inputs
,
const
miopenTensorArgument_t
*
tensor_args
,
void
*
workspace
,
size_t
workspace_size
,
miopenProblem_t
problem
,
bool
tune
=
false
)
{
{
miopenSolution_t
solution
;
miopenSolution_t
solution
;
size_t
found
=
0
;
size_t
found
=
0
;
miopen_find_options
fo
=
nullptr
;
miopen_find_options
fo
=
make_obj
<
miopen_find_options
>
(
&
miopenCreateFindOptions
)
;
if
(
tune
)
if
(
tune
)
{
{
fo
=
make_obj
<
miopen_find_options
>
(
&
miopenCreateFindOptions
);
miopenSetFindOptionTuning
(
fo
.
get
(),
1
);
miopenSetFindOptionTuning
(
fo
.
get
(),
1
);
}
}
auto
status
=
miopenFindSolutions
(
handle
,
problem
,
fo
.
get
(),
&
solution
,
&
found
,
1
);
#ifdef MIGRAPHX_PREALLOCATE_MIOPEN_BUFFERS
for
(
auto
i
:
range
(
num_inputs
))
{
auto
status
=
miopenSetFindOptionPreallocatedTensor
(
fo
.
get
(),
tensor_args
[
i
].
id
,
tensor_args
[
i
].
buffer
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen: failed to preallocate tensors for the find process"
);
}
auto
status
=
miopenSetFindOptionPreallocatedWorkspace
(
fo
.
get
(),
workspace
,
workspace_size
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen: failed to preallocate workspace for the find process"
);
#else
miopenStatus_t
status
;
(
void
)(
num_inputs
);
(
void
)(
tensor_args
);
(
void
)(
workspace_size
);
(
void
)(
workspace
);
#endif
status
=
miopenFindSolutions
(
handle
,
problem
,
fo
.
get
(),
&
solution
,
&
found
,
1
);
auto
result
=
miopen_solution
{
solution
};
auto
result
=
miopen_solution
{
solution
};
if
(
status
!=
miopenStatusSuccess
or
found
==
0
)
if
(
status
!=
miopenStatusSuccess
or
found
==
0
)
MIGRAPHX_THROW
(
"MIOpen miopenFindSolutions failed"
);
MIGRAPHX_THROW
(
"MIOpen
:
miopenFindSolutions failed"
);
return
result
;
return
result
;
}
}
...
@@ -170,7 +192,7 @@ inline convolution_descriptor make_conv(const T& op)
...
@@ -170,7 +192,7 @@ inline convolution_descriptor make_conv(const T& op)
}
}
template
<
class
T
>
template
<
class
T
>
inline
convolution_descriptor
make_
de
conv
(
const
T
&
op
)
inline
convolution_descriptor
make_conv
olution_backwards
(
const
T
&
op
)
{
{
auto
c
=
make_obj
<
convolution_descriptor
>
(
&
miopenCreateConvolutionDescriptor
);
auto
c
=
make_obj
<
convolution_descriptor
>
(
&
miopenCreateConvolutionDescriptor
);
miopenConvolutionMode_t
c_mode
=
miopenTranspose
;
miopenConvolutionMode_t
c_mode
=
miopenTranspose
;
...
...
src/targets/gpu/include/migraphx/gpu/mlir.hpp
View file @
23cb7917
...
@@ -26,23 +26,30 @@
...
@@ -26,23 +26,30 @@
#include <string>
#include <string>
#include <vector>
#include <vector>
#include <migraphx/config.hpp>
#include <migraphx/
gpu/
config.hpp>
#include <migraphx/gpu/code_object_op.hpp>
#include <migraphx/gpu/code_object_op.hpp>
#include <migraphx/instruction_ref.hpp>
#include <migraphx/instruction_ref.hpp>
#include <migraphx/gpu/tuning_config.hpp>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
struct
module
;
struct
module
;
namespace
gpu
{
namespace
gpu
{
std
::
string
dump_mlir
(
const
module
&
m
);
MIGRAPHX_GPU_EXPORT
std
::
string
dump_mlir
(
const
module
&
m
);
code_object_op
MIGRAPHX_GPU_EXPORT
code_object_op
compile_mlir
(
const
context
&
migraphx_ctx
,
compile_mlir
(
const
context
&
ctx
,
module
m
,
const
std
::
vector
<
instruction_ref
>&
inputs
);
module
m
,
const
std
::
vector
<
instruction_ref
>&
inputs
,
const
value
&
solution
);
instruction_ref
insert_mlir
(
module
&
m
,
MIGRAPHX_GPU_EXPORT
instruction_ref
insert_mlir
(
module
&
m
,
instruction_ref
ins
,
instruction_ref
ins
,
code_object_op
co
,
code_object_op
co
,
const
std
::
vector
<
instruction_ref
>&
inputs
);
const
std
::
vector
<
instruction_ref
>&
inputs
);
MIGRAPHX_GPU_EXPORT
tuning_config
get_tuning_config_mlir
(
const
context
&
migraphx_ctx
,
module
m
,
const
std
::
vector
<
shape
>&
inputs
);
}
// namespace gpu
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace MIGRAPHX_INLINE_NS
...
...
src/targets/gpu/include/migraphx/gpu/pack_args.hpp
View file @
23cb7917
...
@@ -24,7 +24,7 @@
...
@@ -24,7 +24,7 @@
#ifndef MIGRAPHX_GUARD_RTGLIB_PACK_ARGS_HPP
#ifndef MIGRAPHX_GUARD_RTGLIB_PACK_ARGS_HPP
#define MIGRAPHX_GUARD_RTGLIB_PACK_ARGS_HPP
#define MIGRAPHX_GUARD_RTGLIB_PACK_ARGS_HPP
#include <migraphx/config.hpp>
#include <migraphx/
gpu/
config.hpp>
#include <migraphx/requires.hpp>
#include <migraphx/requires.hpp>
#include <utility>
#include <utility>
#include <vector>
#include <vector>
...
@@ -46,7 +46,7 @@ struct kernel_argument
...
@@ -46,7 +46,7 @@ struct kernel_argument
void
*
data
;
void
*
data
;
};
};
std
::
vector
<
char
>
pack_args
(
const
std
::
vector
<
kernel_argument
>&
args
);
MIGRAPHX_GPU_EXPORT
std
::
vector
<
char
>
pack_args
(
const
std
::
vector
<
kernel_argument
>&
args
);
}
// namespace gpu
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace MIGRAPHX_INLINE_NS
...
...
src/targets/gpu/include/migraphx/gpu/pack_int8_args.hpp
View file @
23cb7917
...
@@ -25,7 +25,6 @@
...
@@ -25,7 +25,6 @@
#define MIGRAPHX_GUARD_RTGLIB_PACK_INT8_ARGS_HPP
#define MIGRAPHX_GUARD_RTGLIB_PACK_INT8_ARGS_HPP
#include <migraphx/program.hpp>
#include <migraphx/program.hpp>
#include <migraphx/config.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/context.hpp>
namespace
migraphx
{
namespace
migraphx
{
...
@@ -33,7 +32,7 @@ inline namespace MIGRAPHX_INLINE_NS {
...
@@ -33,7 +32,7 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace
gpu
{
namespace
gpu
{
struct
pack_int8_args
struct
MIGRAPHX_GPU_EXPORT
pack_int8_args
{
{
std
::
string
name
()
const
{
return
"gpu::pack_int8_args"
;
}
std
::
string
name
()
const
{
return
"gpu::pack_int8_args"
;
}
void
apply
(
module
&
m
)
const
;
void
apply
(
module
&
m
)
const
;
...
...
src/targets/gpu/include/migraphx/gpu/rocblas.hpp
View file @
23cb7917
...
@@ -39,9 +39,10 @@ rocblas_handle_ptr create_rocblas_handle_ptr(hipStream_t s);
...
@@ -39,9 +39,10 @@ rocblas_handle_ptr create_rocblas_handle_ptr(hipStream_t s);
struct
context
;
struct
context
;
bool
get_compute_fp32_flag
();
MIGRAPHX_GPU_EXPORT
bool
get_compute_fp32_flag
();
MIGRAPHX_GPU_EXPORT
bool
get_int8_x4_format
(
context
&
ctx
);
bool
get_int8_x4_format
(
context
&
ctx
);
}
// namespace gpu
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
}
// namespace migraphx
...
...
src/targets/gpu/include/migraphx/gpu/target.hpp
View file @
23cb7917
...
@@ -26,13 +26,13 @@
...
@@ -26,13 +26,13 @@
#include <migraphx/program.hpp>
#include <migraphx/program.hpp>
#include <migraphx/compile_options.hpp>
#include <migraphx/compile_options.hpp>
#include <migraphx/config.hpp>
#include <migraphx/
gpu/
config.hpp>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
gpu
{
struct
target
struct
MIGRAPHX_GPU_EXPORT
target
{
{
std
::
string
name
()
const
;
std
::
string
name
()
const
;
std
::
vector
<
pass
>
get_passes
(
migraphx
::
context
&
gctx
,
const
compile_options
&
options
)
const
;
std
::
vector
<
pass
>
get_passes
(
migraphx
::
context
&
gctx
,
const
compile_options
&
options
)
const
;
...
...
src/targets/gpu/
driver/
include/migraphx/gpu/
driver/perf
.hpp
→
src/targets/gpu/include/migraphx/gpu/
time_op
.hpp
View file @
23cb7917
...
@@ -31,12 +31,10 @@
...
@@ -31,12 +31,10 @@
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
gpu
{
namespace
driver
{
std
::
pair
<
double
,
double
>
MIGRAPHX_GPU_EXPORT
std
::
pair
<
double
,
double
>
time_op
(
context
&
ictx
,
operation
op
,
const
std
::
vector
<
shape
>&
inputs
,
int
n
=
100
);
time_op
(
context
&
ictx
,
operation
op
,
const
std
::
vector
<
shape
>&
inputs
,
int
n
=
100
);
}
// namespace driver
}
// namespace gpu
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
}
// namespace migraphx
...
...
src/targets/gpu/include/migraphx/gpu/tuning_config.hpp
0 → 100644
View file @
23cb7917
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_GPU_TUNING_CONFIG_HPP
#define MIGRAPHX_GUARD_GPU_TUNING_CONFIG_HPP
#include <migraphx/config.hpp>
#include <migraphx/value.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
struct
tuning_config
{
value
problem
;
std
::
vector
<
value
>
solutions
;
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif // MIGRAPHX_GUARD_GPU_TUNING_CONFIG_HPP
src/targets/gpu/include/migraphx/gpu/write_literals.hpp
View file @
23cb7917
...
@@ -32,7 +32,7 @@ struct module;
...
@@ -32,7 +32,7 @@ struct module;
namespace
gpu
{
namespace
gpu
{
struct
write_literals
struct
MIGRAPHX_GPU_EXPORT
write_literals
{
{
context
*
ctx
=
nullptr
;
context
*
ctx
=
nullptr
;
std
::
string
name
()
const
{
return
"gpu::write_literals"
;
}
std
::
string
name
()
const
{
return
"gpu::write_literals"
;
}
...
...
src/targets/gpu/jit/ck_gemm.cpp
0 → 100644
View file @
23cb7917
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <fstream>
#include <migraphx/filesystem.hpp>
#include <migraphx/gpu/compiler.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/env.hpp>
#include <migraphx/file_buffer.hpp>
#include <migraphx/gpu/compile_gen.hpp>
#include <migraphx/gpu/compile_hip.hpp>
#include <migraphx/gpu/compile_hip_code_object.hpp>
#include <migraphx/module.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/reduce_dims.hpp>
#include <migraphx/stringutils.hpp>
#include "ck/host/device_gemm_multiple_d.hpp"
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
using
namespace
migraphx
::
gpu
::
gen
;
// NOLINT
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_LOG_CK_GEMM
);
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_CK_TUNING
);
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_CK_TUNING_VALUE
);
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_CK_DEBUG
);
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_TUNE_CK
);
// NOLINTNEXTLINE
static
const
char
*
const
ck_gemm_kernel
=
R"__migraphx__(
#include <args.hpp>
#include <migraphx/kernels/ck_gemm.hpp>
#include <migraphx/kernels/pointwise.hpp>
#include <migraphx/kernels/ops.hpp>
#include <${include}>
namespace migraphx {
${preamble}
extern "C" {
MIGRAPHX_GLOBAL void ${kernel}(${params})
{
transform_args(make_tensors(), rotate_last())(${args})([](auto... xs) {
ck_gemm<${solution}, ${blocks_per_batch}>(xs...);
});
}
}
} // namespace migraphx
)__migraphx__"
;
// NOLINTNEXTLINE
static
const
char
*
const
disable_warning_pragma
=
R"__migraphx__(
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
${content}
#pragma clang diagnostic pop
)__migraphx__"
;
template
<
class
P
>
static
std
::
string
ck_disable_warnings
(
P
p
)
{
return
interpolate_string
(
disable_warning_pragma
,
{{
"content"
,
std
::
string
{
p
.
first
,
p
.
second
}}});
}
static
std
::
unordered_map
<
std
::
string
,
std
::
string
>
create_ck_header_strings
()
{
std
::
unordered_map
<
std
::
string
,
std
::
string
>
result
;
auto
ck_headers
=
ck
::
host
::
GetHeaders
();
std
::
transform
(
ck_headers
.
begin
(),
ck_headers
.
end
(),
std
::
inserter
(
result
,
result
.
begin
()),
[
&
](
auto
&&
p
)
{
return
std
::
make_pair
(
p
.
first
,
ck_disable_warnings
(
p
.
second
));
});
return
result
;
}
static
std
::
vector
<
src_file
>
create_ck_headers
()
{
static
const
auto
&
header_strings
=
create_ck_header_strings
();
std
::
vector
<
src_file
>
srcs
;
std
::
transform
(
header_strings
.
begin
(),
header_strings
.
end
(),
std
::
back_inserter
(
srcs
),
[
&
](
auto
&&
p
)
{
return
src_file
{
fs
::
path
{
p
.
first
},
{
p
.
second
.
data
(),
p
.
second
.
data
()
+
p
.
second
.
size
()}};
});
return
srcs
;
}
static
const
std
::
vector
<
src_file
>&
ck_headers
()
{
static
const
auto
&
headers
=
create_ck_headers
();
return
headers
;
}
static
bool
transposed_matrix
(
const
shape
&
s
)
{
return
s
.
strides
().
back
()
!=
1
;
}
using
tuning_entry
=
std
::
pair
<
std
::
vector
<
shape
>
,
size_t
>
;
static
std
::
vector
<
tuning_entry
>
read_tuning
(
const
std
::
string
&
s
)
{
if
(
not
fs
::
exists
(
s
))
return
{};
return
from_value
<
std
::
vector
<
tuning_entry
>>
(
from_json_string
(
read_string
(
s
)));
}
static
float
matrix_distance
(
const
shape
&
x
,
const
shape
&
y
)
{
if
(
x
.
type
()
!=
y
.
type
())
return
std
::
numeric_limits
<
float
>::
max
();
if
(
transposed_matrix
(
x
)
!=
transposed_matrix
(
y
))
return
std
::
numeric_limits
<
float
>::
max
();
auto
sum_squared
=
std
::
inner_product
(
x
.
lens
().
rbegin
(),
x
.
lens
().
rbegin
()
+
2
,
y
.
lens
().
rbegin
(),
0
,
std
::
plus
<>
{},
[](
auto
a
,
auto
b
)
{
return
(
a
-
b
)
*
(
a
-
b
);
});
return
std
::
sqrt
(
sum_squared
);
}
static
std
::
size_t
get_tuning_for
(
const
std
::
vector
<
shape
>&
inputs
)
{
static
auto
tuning
=
read_tuning
(
string_value_of
(
MIGRAPHX_CK_TUNING
{},
""
));
if
(
tuning
.
empty
())
{
std
::
cout
<<
"*********** Warning: No CK tuning! for config:"
<<
std
::
endl
;
std
::
cout
<<
" "
<<
inputs
[
0
]
<<
std
::
endl
;
std
::
cout
<<
" "
<<
inputs
[
1
]
<<
std
::
endl
;
std
::
cout
<<
" "
<<
inputs
[
2
]
<<
std
::
endl
;
}
auto
it
=
std
::
find_if
(
tuning
.
begin
(),
tuning
.
end
(),
[
&
](
const
auto
&
p
)
{
return
p
.
first
==
inputs
;
});
if
(
it
==
tuning
.
end
())
{
std
::
cout
<<
"*********** Warning: CK tuning missing for config!"
<<
std
::
endl
;
std
::
cout
<<
" "
<<
inputs
[
0
]
<<
std
::
endl
;
std
::
cout
<<
" "
<<
inputs
[
1
]
<<
std
::
endl
;
std
::
cout
<<
" "
<<
inputs
[
2
]
<<
std
::
endl
;
std
::
vector
<
std
::
pair
<
float
,
std
::
size_t
>>
w
;
std
::
transform
(
tuning
.
begin
(),
tuning
.
end
(),
std
::
back_inserter
(
w
),
[
&
](
const
auto
&
p
)
{
if
(
inputs
.
size
()
<
3
or
p
.
first
.
size
()
<
3
)
MIGRAPHX_THROW
(
"Invalid CK config"
);
auto
avg_distance
=
std
::
inner_product
(
p
.
first
.
begin
(),
p
.
first
.
begin
()
+
3
,
inputs
.
begin
(),
0.0
f
,
std
::
plus
<>
{},
[](
const
auto
&
x
,
const
auto
&
y
)
{
return
matrix_distance
(
x
,
y
)
/
3.0
f
;
});
return
std
::
make_pair
(
avg_distance
,
p
.
second
);
});
std
::
sort
(
w
.
begin
(),
w
.
end
());
std
::
size_t
default_value
=
4
;
if
(
not
w
.
empty
())
default_value
=
w
.
front
().
second
;
auto
tuning_val
=
value_of
(
MIGRAPHX_CK_TUNING_VALUE
{},
default_value
);
std
::
cout
<<
"*********** Warning: CK try tuning: "
<<
tuning_val
<<
std
::
endl
;
return
tuning_val
;
}
return
it
->
second
;
}
struct
ck_gemm_compiler
:
compiler
<
ck_gemm_compiler
>
{
static
std
::
string
get_layout
(
const
shape
&
s
)
{
return
transposed_matrix
(
s
)
?
"ck::tensor_layout::gemm::ColumnMajor"
:
"ck::tensor_layout::gemm::RowMajor"
;
}
static
ck
::
host
::
DataType
get_type
(
const
shape
&
s
)
{
if
(
s
.
type
()
==
shape
::
half_type
)
return
ck
::
host
::
DataType
::
Half
;
else
if
(
s
.
type
()
==
shape
::
float_type
)
return
ck
::
host
::
DataType
::
Float
;
else
if
(
s
.
type
()
==
shape
::
int8_type
)
return
ck
::
host
::
DataType
::
Int8
;
else
if
(
s
.
type
()
==
shape
::
int32_type
)
return
ck
::
host
::
DataType
::
Int32
;
MIGRAPHX_THROW
(
"Unsupported ck type"
);
}
template
<
class
Iterator
,
class
F
>
static
std
::
string
ck_tuple
(
Iterator
start
,
Iterator
last
,
F
f
)
{
std
::
vector
<
std
::
string
>
s
;
std
::
transform
(
start
,
last
,
std
::
back_inserter
(
s
),
f
);
return
"ck::Tuple<"
+
join_strings
(
s
,
","
)
+
">"
;
}
static
std
::
vector
<
shape
>
adjust_inputs
(
std
::
vector
<
shape
>
inputs
,
bool
&
swap_inputs
)
{
swap_inputs
=
false
;
auto
c_shape
=
inputs
.
back
();
if
(
not
transposed_matrix
(
c_shape
))
return
inputs
;
std
::
vector
<
int64_t
>
perm
(
c_shape
.
lens
().
size
());
std
::
iota
(
perm
.
begin
(),
perm
.
end
(),
0
);
std
::
swap
(
perm
[
perm
.
size
()
-
1
],
perm
[
perm
.
size
()
-
2
]);
std
::
transform
(
inputs
.
begin
(),
inputs
.
end
(),
inputs
.
begin
(),
[
&
](
shape
s
)
{
return
reorder_shape
(
s
,
perm
);
});
swap_inputs
=
true
;
return
inputs
;
}
static
std
::
size_t
get_batch_count
(
const
shape
&
s
)
{
return
std
::
accumulate
(
s
.
lens
().
rbegin
()
+
2
,
s
.
lens
().
rend
(),
std
::
size_t
{
1
},
std
::
multiplies
<
std
::
size_t
>
());
}
static
void
fold_batch_dims
(
shape
&
s
)
{
auto
lens
=
s
.
lens
();
if
(
lens
.
size
()
<=
2
)
return
;
auto
batch_count
=
get_batch_count
(
s
);
auto
m1
=
lens
.
at
(
lens
.
size
()
-
2
);
auto
m2
=
lens
.
at
(
lens
.
size
()
-
1
);
if
(
transposed_matrix
(
s
))
s
=
shape
{
s
.
type
(),
{
m1
,
m2
*
batch_count
}};
else
s
=
shape
{
s
.
type
(),
{
m1
*
batch_count
,
m2
}};
}
static
void
remove_batch_dims
(
shape
&
s
)
{
auto
lens
=
s
.
lens
();
if
(
lens
.
size
()
<=
2
)
return
;
auto
m1
=
lens
.
at
(
lens
.
size
()
-
2
);
auto
m2
=
lens
.
at
(
lens
.
size
()
-
1
);
s
=
shape
{
s
.
type
(),
{
m1
,
m2
}};
}
std
::
vector
<
std
::
string
>
names
()
const
{
return
{
"ck_gemm"
,
"gpu::ck_gemm"
};
}
static
bool
standard_batch
(
const
shape
&
s
)
{
if
(
s
.
lens
().
size
()
<
3
)
return
true
;
std
::
vector
<
std
::
size_t
>
lens
(
s
.
lens
().
begin
(),
s
.
lens
().
end
()
-
2
);
std
::
vector
<
std
::
size_t
>
strides
(
s
.
strides
().
begin
(),
s
.
strides
().
end
()
-
2
);
auto
base
=
*
(
s
.
lens
().
end
()
-
2
)
*
*
(
s
.
lens
().
end
()
-
1
);
std
::
transform
(
strides
.
begin
(),
strides
.
end
(),
strides
.
begin
(),
[
&
](
auto
stride
)
{
return
stride
/
base
;
});
return
shape
{
s
.
type
(),
lens
,
strides
}.
standard
();
}
bool
can_fold_batch
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
const
auto
&
b_shape
=
inputs
[
1
];
if
(
std
::
any_of
(
inputs
.
begin
()
+
2
,
inputs
.
end
()
-
1
,
[](
auto
input
)
{
return
not
standard_batch
(
input
);
}))
return
false
;
const
auto
&
b_strides
=
b_shape
.
strides
();
return
std
::
all_of
(
b_strides
.
begin
(),
b_strides
.
end
()
-
2
,
[](
auto
stride
)
{
return
stride
==
0
;
});
}
ck
::
host
::
device_gemm_multiple_d
::
Problem
create_problem
(
const
std
::
vector
<
shape
>&
inputs
,
const
value
&
v
)
const
{
const
auto
&
a_shape
=
inputs
[
0
];
const
auto
&
b_shape
=
inputs
[
1
];
const
auto
&
c_shape
=
inputs
.
back
();
// cppcheck-suppress unreadVariable
auto
rank
=
a_shape
.
ndim
();
auto
batch_count
=
get_batch_count
(
c_shape
);
auto
m
=
c_shape
.
lens
()[
rank
-
2
];
m
=
can_fold_batch
(
inputs
)
?
m
*
batch_count
:
m
;
auto
n
=
c_shape
.
lens
().
back
();
auto
k
=
a_shape
.
lens
().
back
();
const
bool
trans_a
=
transposed_matrix
(
a_shape
);
const
bool
trans_b
=
transposed_matrix
(
b_shape
);
const
bool
trans_e
=
transposed_matrix
(
c_shape
);
const
auto
a_type
=
get_type
(
a_shape
);
const
auto
b_type
=
get_type
(
b_shape
);
const
auto
e_type
=
get_type
(
c_shape
);
std
::
vector
<
bool
>
ds_layout
;
std
::
transform
(
inputs
.
begin
()
+
2
,
inputs
.
end
()
-
1
,
std
::
back_inserter
(
ds_layout
),
[](
const
auto
&
i
)
{
return
transposed_matrix
(
i
);
});
std
::
vector
<
ck
::
host
::
DataType
>
ds_type
;
std
::
transform
(
inputs
.
begin
()
+
2
,
inputs
.
end
()
-
1
,
std
::
back_inserter
(
ds_type
),
[](
const
auto
&
i
)
{
return
get_type
(
i
);
});
std
::
string
ck_passthrough
=
"ck_passthrough"
;
std
::
string
cde_op
=
ck_passthrough
;
assert
(
inputs
.
size
()
<
4
or
v
.
contains
(
"post"
));
if
(
v
.
contains
(
"post"
))
{
cde_op
=
v
.
at
(
"post"
).
to
<
std
::
string
>
();
}
return
ck
::
host
::
device_gemm_multiple_d
::
Problem
{
m
,
n
,
k
,
trans_a
,
trans_b
,
trans_e
,
ds_layout
,
a_type
,
b_type
,
e_type
,
ds_type
,
ck_passthrough
,
ck_passthrough
,
cde_op
};
}
operation
compile_op
(
context
&
ctx
,
const
std
::
vector
<
shape
>&
inputs
,
const
value
&
v
)
const
{
const
auto
&
a_shape
=
inputs
[
0
];
const
auto
&
b_shape
=
inputs
[
1
];
const
auto
&
c_shape
=
inputs
.
back
();
auto
tuning_value
=
v
.
get
(
"tuning_value"
,
4
);
if
(
not
v
.
contains
(
"tuning_value"
))
tuning_value
=
get_tuning_for
({
a_shape
,
b_shape
,
c_shape
});
auto
batch_count
=
get_batch_count
(
c_shape
);
auto
problem
=
create_problem
(
inputs
,
v
);
const
auto
include_header
=
problem
.
GetIncludeHeader
();
const
auto
solutions
=
problem
.
GetSolutions
(
ctx
.
get_current_device
().
get_gfx_name
());
const
auto
&
solution
=
solutions
.
at
(
tuning_value
);
const
auto
template_str
=
solution
.
template_str
;
const
auto
blocks_per_batch
=
solution
.
grid_size
;
const
auto
block_size
=
solution
.
block_size
;
hip_compile_options
options
;
options
.
additional_src_files
=
ck_headers
();
auto
grid_size
=
can_fold_batch
(
inputs
)
?
blocks_per_batch
:
batch_count
*
blocks_per_batch
;
options
.
set_launch_params
(
v
,
grid_size
*
block_size
,
block_size
);
options
.
inputs
=
inputs
;
options
.
output
=
c_shape
;
options
.
kernel_name
=
v
.
get
(
"kernel"
,
"ck_gemm_kernel"
);
options
.
virtual_inputs
=
inputs
;
if
(
can_fold_batch
(
inputs
))
{
auto
vinputs
=
inputs
;
fold_batch_dims
(
vinputs
[
0
]);
remove_batch_dims
(
vinputs
[
1
]);
std
::
for_each
(
vinputs
.
begin
()
+
2
,
vinputs
.
end
(),
fold_batch_dims
);
options
.
virtual_inputs
=
vinputs
;
}
if
(
v
.
get
(
"check"
,
false
)
or
enabled
(
MIGRAPHX_CK_DEBUG
{}))
options
.
params
+=
" -DMIGRAPHX_CK_CHECK=1"
;
auto
src
=
interpolate_string
(
ck_gemm_kernel
,
{{
"solution"
,
template_str
},
{
"include"
,
include_header
},
{
"params"
,
enum_params
(
inputs
.
size
(),
"void * private_p"
)},
{
"args"
,
enum_params
(
inputs
.
size
(),
"private_p"
)},
{
"blocks_per_batch"
,
to_string
(
blocks_per_batch
)},
{
"preamble"
,
v
.
get
(
"preamble"
,
std
::
string
{})},
{
"kernel"
,
options
.
kernel_name
}});
return
compile_hip_code_object
(
src
,
options
);
}
value
create_settings
(
instruction_ref
ins
,
const
operation
&
op
)
const
{
auto
v
=
op
.
to_value
();
v
[
"kernel"
]
=
"ck_gemm_kernel"
;
if
(
not
ins
->
module_inputs
().
empty
())
{
auto
*
pm
=
ins
->
module_inputs
().
front
();
v
[
"preamble"
]
=
generate_pointwise
(
*
pm
,
"post_ck_gemm_function"
)
+
"
\n
MIGRAPHX_LIFT_CLASS(post_ck_gemm, post_ck_gemm_function);"
;
v
[
"post"
]
=
"ck_function_adaptor<post_ck_gemm>"
;
v
[
"kernel"
]
=
"ck_gemm_"
+
generate_name_from_ops
(
*
pm
)
+
"_kernel"
;
}
return
v
;
}
compiler_replace
compile
(
context
&
ctx
,
instruction_ref
ins
,
const
operation
&
op
,
const
value
&
solution
)
const
{
auto
shapes
=
to_shapes
(
ins
->
inputs
());
auto
v
=
create_settings
(
ins
,
op
);
if
(
not
solution
.
is_null
())
v
[
"tuning_value"
]
=
solution
;
return
{
compile_op
(
ctx
,
shapes
,
v
),
[
=
](
module
&
m
,
instruction_ref
ins2
,
const
operation
&
code_object
)
{
if
(
enabled
(
MIGRAPHX_LOG_CK_GEMM
{}))
{
std
::
vector
<
shape
>
gemm_shapes
{
shapes
[
0
],
shapes
[
1
],
shapes
.
back
().
with_type
(
shapes
[
0
].
type
())};
std
::
cout
<<
"gpu::ck_gemm: "
<<
to_json_string
(
to_value
(
gemm_shapes
))
<<
std
::
endl
;
}
m
.
replace_instruction
(
ins2
,
code_object
,
ins2
->
inputs
());
}};
}
optional
<
tuning_config
>
get_tuning_config
(
context
&
ctx
,
instruction_ref
ins
,
const
operation
&
op
,
bool
exhaustive
)
const
{
if
(
not
exhaustive
and
not
enabled
(
MIGRAPHX_TUNE_CK
{}))
return
nullopt
;
tuning_config
tc
;
auto
shapes
=
to_shapes
(
ins
->
inputs
());
auto
problem
=
create_problem
(
shapes
,
create_settings
(
ins
,
op
));
auto
solutions
=
problem
.
GetSolutions
(
ctx
.
get_current_device
().
get_gfx_name
());
tc
.
solutions
.
resize
(
solutions
.
size
());
std
::
iota
(
tc
.
solutions
.
begin
(),
tc
.
solutions
.
end
(),
0
);
std
::
vector
<
shape
>
gemm_shapes
{
shapes
[
0
],
shapes
[
1
],
shapes
.
back
()};
tc
.
problem
=
to_value
(
gemm_shapes
);
return
tc
;
}
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/jit/concat.cpp
View file @
23cb7917
...
@@ -47,7 +47,7 @@ ${preamble}
...
@@ -47,7 +47,7 @@ ${preamble}
extern "C" {
extern "C" {
__global__
void ${kernel}(${params})
MIGRAPHX_GLOBAL
void ${kernel}(${params})
{
{
transform_args(make_tensors(), rotate_last(), ${transformers})(${args})([](auto y, ${concat_params}, auto... xs) {
transform_args(make_tensors(), rotate_last(), ${transformers})(${args})([](auto y, ${concat_params}, auto... xs) {
concat<${axis}>(${concat_args})(${post}, y, xs...);
concat<${axis}>(${concat_args})(${post}, y, xs...);
...
@@ -108,7 +108,7 @@ struct concat_compiler : compiler<concat_compiler>
...
@@ -108,7 +108,7 @@ struct concat_compiler : compiler<concat_compiler>
v
[
"post"
]
=
"MIGRAPHX_LIFT(post_concat)"
;
v
[
"post"
]
=
"MIGRAPHX_LIFT(post_concat)"
;
v
[
"kernel"
]
=
"concat_"
+
generate_name_from_ops
(
*
pm
)
+
"_kernel"
;
v
[
"kernel"
]
=
"concat_"
+
generate_name_from_ops
(
*
pm
)
+
"_kernel"
;
}
}
return
replace
(
compile_op
(
ctx
,
to_shapes
(
ins
->
inputs
()),
v
)
)
;
return
compile_op
(
ctx
,
to_shapes
(
ins
->
inputs
()),
v
);
}
}
};
};
...
...
src/targets/gpu/jit/gather.cpp
View file @
23cb7917
...
@@ -44,7 +44,7 @@ namespace migraphx {
...
@@ -44,7 +44,7 @@ namespace migraphx {
extern "C" {
extern "C" {
__global__
void gather_kernel(void* in_data, void* in_indices, void* output)
MIGRAPHX_GLOBAL
void gather_kernel(void* in_data, void* in_indices, void* output)
{
{
make_tensors()(in_data, in_indices, output)([](auto&&... xs) {
make_tensors()(in_data, in_indices, output)([](auto&&... xs) {
gather<${axis}>(xs...);
gather<${axis}>(xs...);
...
@@ -80,7 +80,7 @@ struct gather_compiler : compiler<gather_compiler>
...
@@ -80,7 +80,7 @@ struct gather_compiler : compiler<gather_compiler>
compiler_replace
compile
(
context
&
ctx
,
instruction_ref
ins
,
const
operation
&
op
)
const
compiler_replace
compile
(
context
&
ctx
,
instruction_ref
ins
,
const
operation
&
op
)
const
{
{
return
replace
(
compile_op
(
ctx
,
to_shapes
(
ins
->
inputs
()),
op
.
to_value
())
)
;
return
compile_op
(
ctx
,
to_shapes
(
ins
->
inputs
()),
op
.
to_value
());
}
}
};
};
...
...
Prev
1
…
11
12
13
14
15
16
17
18
19
…
23
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment