Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
f12064ee
"git@developer.sourcefind.cn:gaoqiong/migraphx.git" did not exist on "c4129727d49b14d2a20ceffb493456f5e7875991"
Commit
f12064ee
authored
Aug 25, 2023
by
umangyadav
Browse files
Merge branch 'develop' into resnet50_partition
parents
2c4f70be
6f1c947f
Changes
126
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
132 additions
and
82 deletions
+132
-82
src/targets/cpu/include/migraphx/cpu/dnnl.hpp
src/targets/cpu/include/migraphx/cpu/dnnl.hpp
+5
-1
src/targets/cpu/target.cpp
src/targets/cpu/target.cpp
+1
-1
src/targets/gpu/CMakeLists.txt
src/targets/gpu/CMakeLists.txt
+1
-1
src/targets/gpu/compile_gen.cpp
src/targets/gpu/compile_gen.cpp
+1
-1
src/targets/gpu/compile_hip_code_object.cpp
src/targets/gpu/compile_hip_code_object.cpp
+1
-1
src/targets/gpu/device/include/migraphx/gpu/device/launch.hpp
...targets/gpu/device/include/migraphx/gpu/device/launch.hpp
+7
-1
src/targets/gpu/device/include/migraphx/gpu/device/nary.hpp
src/targets/gpu/device/include/migraphx/gpu/device/nary.hpp
+2
-2
src/targets/gpu/device/topk.cpp
src/targets/gpu/device/topk.cpp
+2
-2
src/targets/gpu/device_name.cpp
src/targets/gpu/device_name.cpp
+1
-15
src/targets/gpu/fuse_mlir.cpp
src/targets/gpu/fuse_mlir.cpp
+35
-30
src/targets/gpu/include/migraphx/gpu/compiler.hpp
src/targets/gpu/include/migraphx/gpu/compiler.hpp
+1
-6
src/targets/gpu/include/migraphx/gpu/context.hpp
src/targets/gpu/include/migraphx/gpu/context.hpp
+2
-8
src/targets/gpu/include/migraphx/gpu/device_name.hpp
src/targets/gpu/include/migraphx/gpu/device_name.hpp
+0
-2
src/targets/gpu/include/migraphx/gpu/hip.hpp
src/targets/gpu/include/migraphx/gpu/hip.hpp
+1
-1
src/targets/gpu/include/migraphx/gpu/mlir.hpp
src/targets/gpu/include/migraphx/gpu/mlir.hpp
+8
-3
src/targets/gpu/include/migraphx/gpu/tuning_config.hpp
src/targets/gpu/include/migraphx/gpu/tuning_config.hpp
+43
-0
src/targets/gpu/jit/ck_gemm.cpp
src/targets/gpu/jit/ck_gemm.cpp
+2
-1
src/targets/gpu/jit/mlir.cpp
src/targets/gpu/jit/mlir.cpp
+15
-2
src/targets/gpu/jit/pointwise.cpp
src/targets/gpu/jit/pointwise.cpp
+1
-1
src/targets/gpu/jit/reduce.cpp
src/targets/gpu/jit/reduce.cpp
+3
-3
No files found.
src/targets/cpu/include/migraphx/cpu/dnnl.hpp
View file @
f12064ee
...
@@ -400,7 +400,11 @@ struct dnnl_extend_op : dnnl_op<Derived, Primitive>
...
@@ -400,7 +400,11 @@ struct dnnl_extend_op : dnnl_op<Derived, Primitive>
}
}
// dnnl has some issues with non-packed inputs
// dnnl has some issues with non-packed inputs
void
required
(
const
check_shapes
&
cs
)
const
{
cs
.
packed_or_broadcasted
();
}
template
<
class
T
>
void
required
(
const
check_shapes
<
T
>&
cs
)
const
{
cs
.
packed_or_broadcasted
();
}
std
::
string
name
()
const
{
return
"dnnl::"
+
op
.
name
();
}
std
::
string
name
()
const
{
return
"dnnl::"
+
op
.
name
();
}
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
...
...
src/targets/cpu/target.cpp
View file @
f12064ee
...
@@ -61,7 +61,7 @@ namespace cpu {
...
@@ -61,7 +61,7 @@ namespace cpu {
std
::
string
target
::
name
()
const
{
return
"cpu"
;
}
std
::
string
target
::
name
()
const
{
return
"cpu"
;
}
// cppcheck-suppress constParameter
// cppcheck-suppress constParameter
Reference
std
::
vector
<
pass
>
target
::
get_passes
(
migraphx
::
context
&
gctx
,
const
compile_options
&
)
const
std
::
vector
<
pass
>
target
::
get_passes
(
migraphx
::
context
&
gctx
,
const
compile_options
&
)
const
{
{
auto
&
ctx
=
any_cast
<
context
>
(
gctx
);
auto
&
ctx
=
any_cast
<
context
>
(
gctx
);
...
...
src/targets/gpu/CMakeLists.txt
View file @
f12064ee
...
@@ -48,7 +48,7 @@ include(Embed)
...
@@ -48,7 +48,7 @@ include(Embed)
file
(
GLOB KERNEL_FILES CONFIGURE_DEPENDS
file
(
GLOB KERNEL_FILES CONFIGURE_DEPENDS
${
CMAKE_CURRENT_SOURCE_DIR
}
/kernels/include/migraphx/kernels/*.hpp
)
${
CMAKE_CURRENT_SOURCE_DIR
}
/kernels/include/migraphx/kernels/*.hpp
)
message
(
STATUS
"KERNEL_FILES:
${
KERNEL_FILES
}
"
)
message
(
STATUS
"KERNEL_FILES:
${
KERNEL_FILES
}
"
)
add_embed_library
(
migraphx_kernels
${
KERNEL_FILES
}
)
add_embed_library
(
migraphx_kernels
${
KERNEL_FILES
}
RELATIVE
${
CMAKE_CURRENT_SOURCE_DIR
}
/kernels/include/
)
file
(
GLOB DEVICE_GPU_SRCS CONFIGURE_DEPENDS
${
CMAKE_CURRENT_SOURCE_DIR
}
/device/*.cpp
)
file
(
GLOB DEVICE_GPU_SRCS CONFIGURE_DEPENDS
${
CMAKE_CURRENT_SOURCE_DIR
}
/device/*.cpp
)
add_library
(
migraphx_device
${
DEVICE_GPU_SRCS
}
)
add_library
(
migraphx_device
${
DEVICE_GPU_SRCS
}
)
...
...
src/targets/gpu/compile_gen.cpp
View file @
f12064ee
...
@@ -331,7 +331,7 @@ static std::vector<std::string> get_op_names(const module& m)
...
@@ -331,7 +331,7 @@ static std::vector<std::string> get_op_names(const module& m)
{
{
if
(
starts_with
(
ins
.
name
(),
"@"
))
if
(
starts_with
(
ins
.
name
(),
"@"
))
continue
;
continue
;
if
(
ins
.
name
()
==
"multibroadcast"
)
if
(
contains
({
"multibroadcast"
,
"contiguous"
},
ins
.
name
())
)
continue
;
continue
;
if
(
ins
.
name
()
==
"pointwise"
)
if
(
ins
.
name
()
==
"pointwise"
)
{
{
...
...
src/targets/gpu/compile_hip_code_object.cpp
View file @
f12064ee
...
@@ -167,7 +167,7 @@ operation compile_hip_code_object(const std::string& content, hip_compile_option
...
@@ -167,7 +167,7 @@ operation compile_hip_code_object(const std::string& content, hip_compile_option
[](
auto
&&
p
)
{
[](
auto
&&
p
)
{
auto
&&
name
=
p
.
first
;
auto
&&
name
=
p
.
first
;
auto
&&
c
=
p
.
second
;
auto
&&
c
=
p
.
second
;
auto
path
=
fs
::
path
{
"migraphx"
}
/
"kernels"
/
name
;
auto
path
=
name
;
return
src_file
{
path
,
c
};
return
src_file
{
path
,
c
};
});
});
srcs
.
push_back
(
src_file
{
fs
::
path
{
"main.cpp"
},
srcs
.
push_back
(
src_file
{
fs
::
path
{
"main.cpp"
},
...
...
src/targets/gpu/device/include/migraphx/gpu/device/launch.hpp
View file @
f12064ee
...
@@ -41,7 +41,7 @@ struct index
...
@@ -41,7 +41,7 @@ struct index
__device__
index_int
nglobal
()
const
{
return
blockDim
.
x
*
gridDim
.
x
;
}
// NOLINT
__device__
index_int
nglobal
()
const
{
return
blockDim
.
x
*
gridDim
.
x
;
}
// NOLINT
__device__
index_int
nlocal
()
const
{
return
blockDim
.
x
;
}
// NOLINT
__device__
index_int
nlocal
()
const
{
return
blockDim
.
x
;
}
// NOLINT
template
<
class
F
>
template
<
class
F
>
__device__
void
global_stride
(
index_int
n
,
F
f
)
const
__device__
void
global_stride
(
index_int
n
,
F
f
)
const
...
@@ -81,6 +81,12 @@ inline auto launch(hipStream_t stream, index_int global, index_int local)
...
@@ -81,6 +81,12 @@ inline auto launch(hipStream_t stream, index_int global, index_int local)
dim3
nthreads
(
local
);
dim3
nthreads
(
local
);
// cppcheck-suppress UseDeviceLaunch
// cppcheck-suppress UseDeviceLaunch
hipLaunchKernelGGL
((
launcher
<
f_type
>
),
nblocks
,
nthreads
,
0
,
stream
,
f
);
hipLaunchKernelGGL
((
launcher
<
f_type
>
),
nblocks
,
nthreads
,
0
,
stream
,
f
);
hipError_t
kernel_launch_status
=
hipGetLastError
();
if
(
kernel_launch_status
!=
hipSuccess
)
{
MIGRAPHX_THROW
(
"MIGraphX device kernel failed to launch with error: "
+
std
::
string
(
hipGetErrorString
(
kernel_launch_status
)));
}
};
};
}
}
...
...
src/targets/gpu/device/include/migraphx/gpu/device/nary.hpp
View file @
f12064ee
...
@@ -124,7 +124,7 @@ void nary_broadcast_vec_impl(
...
@@ -124,7 +124,7 @@ void nary_broadcast_vec_impl(
buffer
[
i
]
=
binput
.
data
()[
i
];
buffer
[
i
]
=
binput
.
data
()[
i
];
}
}
__syncthreads
();
__syncthreads
();
auto
*
bp
=
as_pointer
(
buffer
);
const
auto
*
bp
=
as_pointer
(
buffer
);
// Process the data
// Process the data
for
(
size_t
i
=
idx
.
global
;
i
<
nelements
;
i
+=
nglobal
)
for
(
size_t
i
=
idx
.
global
;
i
<
nelements
;
i
+=
nglobal
)
{
{
...
@@ -219,7 +219,7 @@ void nary_double_broadcast_vec_impl(
...
@@ -219,7 +219,7 @@ void nary_double_broadcast_vec_impl(
buffer
[
i
+
bdim_vec_len
]
=
binput2
.
data
()[
i
];
buffer
[
i
+
bdim_vec_len
]
=
binput2
.
data
()[
i
];
}
}
__syncthreads
();
__syncthreads
();
auto
*
bp
=
as_pointer
(
buffer
);
const
auto
*
bp
=
as_pointer
(
buffer
);
// Process the data
// Process the data
for
(
size_t
i
=
idx
.
global
;
i
<
nelements
;
i
+=
nglobal
)
for
(
size_t
i
=
idx
.
global
;
i
<
nelements
;
i
+=
nglobal
)
{
{
...
...
src/targets/gpu/device/topk.cpp
View file @
f12064ee
...
@@ -72,12 +72,12 @@ struct hip_heap_vector
...
@@ -72,12 +72,12 @@ struct hip_heap_vector
index_int
l
=
2
*
index
+
1
;
index_int
l
=
2
*
index
+
1
;
index_int
r
=
2
*
index
+
2
;
index_int
r
=
2
*
index
+
2
;
if
(
l
<
n
&&
compare
(
data
[
data_index
(
l
)],
data
[
data_index
(
index
)]))
if
(
l
<
n
and
compare
(
data
[
data_index
(
l
)],
data
[
data_index
(
index
)]))
{
{
index
=
l
;
index
=
l
;
}
}
if
(
r
<
n
&&
compare
(
data
[
data_index
(
r
)],
data
[
data_index
(
index
)]))
if
(
r
<
n
and
compare
(
data
[
data_index
(
r
)],
data
[
data_index
(
index
)]))
{
{
index
=
r
;
index
=
r
;
if
(
compare
(
data
[
data_index
(
l
)],
data
[
data_index
(
r
)]))
if
(
compare
(
data
[
data_index
(
l
)],
data
[
data_index
(
r
)]))
...
...
src/targets/gpu/device_name.cpp
View file @
f12064ee
...
@@ -31,20 +31,6 @@ namespace migraphx {
...
@@ -31,20 +31,6 @@ namespace migraphx {
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
gpu
{
template
<
class
HipDeviceProp
>
std
::
string
get_arch_name
(
rank
<
0
>
,
const
HipDeviceProp
&
props
)
{
return
"gfx"
+
std
::
to_string
(
props
.
gcnArch
);
}
template
<
class
HipDeviceProp
>
auto
get_arch_name
(
rank
<
1
>
,
const
HipDeviceProp
&
props
)
->
decltype
(
std
::
string
(
props
.
gcnArchName
))
{
return
std
::
string
(
props
.
gcnArchName
);
}
std
::
string
get_arch_name
(
const
hipDeviceProp_t
&
props
)
{
return
get_arch_name
(
rank
<
1
>
{},
props
);
}
int
get_device_id
()
int
get_device_id
()
{
{
int
device
;
int
device
;
...
@@ -60,7 +46,7 @@ std::string get_device_name()
...
@@ -60,7 +46,7 @@ std::string get_device_name()
auto
status
=
hipGetDeviceProperties
(
&
props
,
get_device_id
());
auto
status
=
hipGetDeviceProperties
(
&
props
,
get_device_id
());
if
(
status
!=
hipSuccess
)
if
(
status
!=
hipSuccess
)
MIGRAPHX_THROW
(
"Failed to get device properties"
);
MIGRAPHX_THROW
(
"Failed to get device properties"
);
return
get_a
rch
_n
ame
(
props
)
;
return
props
.
gcnA
rch
N
ame
;
}
}
}
// namespace gpu
}
// namespace gpu
...
...
src/targets/gpu/fuse_mlir.cpp
View file @
f12064ee
...
@@ -86,7 +86,7 @@ struct mlir_op
...
@@ -86,7 +86,7 @@ struct mlir_op
size_t
param_cnt
=
0
;
size_t
param_cnt
=
0
;
std
::
vector
<
std
::
string
>
names
=
mod
->
get_parameter_names
();
std
::
vector
<
std
::
string
>
names
=
mod
->
get_parameter_names
();
std
::
sort
(
names
.
begin
(),
names
.
end
());
std
::
sort
(
names
.
begin
(),
names
.
end
());
for
(
std
::
string
param_name
:
names
)
for
(
const
std
::
string
&
param_name
:
names
)
{
{
ins_shapes
[
mod
->
get_parameter
(
param_name
)]
=
inputs
[
param_cnt
++
];
ins_shapes
[
mod
->
get_parameter
(
param_name
)]
=
inputs
[
param_cnt
++
];
}
}
...
@@ -210,42 +210,47 @@ struct find_mlir_op
...
@@ -210,42 +210,47 @@ struct find_mlir_op
return
false
;
return
false
;
}
}
const
std
::
initializer_list
<
std
::
string
>
any_type_ops
=
{
"@literal"
,
"@param"
,
"@return"
};
const
std
::
initializer_list
<
std
::
string
>
any_type_ops
=
{
"@literal"
,
"@param"
,
"@return"
};
const
std
::
initializer_list
<
std
::
string
>
no_bool_ops
=
{
"convolution"
,
const
std
::
initializer_list
<
std
::
string
>
no_bool_ops
=
{
"quant_convolution"
,
"convolution"
,
"dot"
,
"quant_convolution"
,
"quant_dot"
,
"dot"
,
"add"
,
"quant_dot"
,
"clip"
,
"add"
,
"relu"
,
"clip"
,
"sub"
,
"relu"
,
"mul"
,
"sub"
,
"div"
,
"mul"
,
"pow"
,
"div"
,
"where"
,
"pow"
,
"quantizelinear"
,
"where"
,
"dequantizelinear"
,
"quantizelinear"
,
"abs"
,
"dequantizelinear"
,
"neg"
};
"abs"
,
const
std
::
initializer_list
<
std
::
string
>
fp_only_ops
=
{
"ceil"
,
"neg"
,
"erf"
,
};
"exp"
,
const
std
::
initializer_list
<
std
::
string
>
fp_only_ops
=
{
"floor"
,
"ceil"
,
"log"
,
"erf"
,
"recip"
,
"exp"
,
"rsqrt"
,
"floor"
,
"sigmoid"
"log"
,
"softmax"
,
"recip"
,
"tanh"
};
"rsqrt"
,
// There are bugs in MLIR right now for models using sigmoid so disable it for now
// "sigmoid",
"softmax"
,
"tanh"
,
};
bool
is_float
=
contains
({
type_t
::
float_type
,
type_t
::
half_type
},
result_type
);
bool
is_float
=
contains
({
type_t
::
float_type
,
type_t
::
half_type
},
result_type
);
if
(
contains
(
any_type_ops
,
name
))
if
(
contains
(
any_type_ops
,
name
))
return
true
;
return
true
;
if
(
result_type
!=
type_t
::
bool_type
&&
contains
(
no_bool_ops
,
name
))
if
(
result_type
!=
type_t
::
bool_type
and
contains
(
no_bool_ops
,
name
))
return
true
;
return
true
;
if
(
is_float
&&
contains
(
fp_only_ops
,
name
))
if
(
is_float
and
contains
(
fp_only_ops
,
name
))
return
true
;
return
true
;
// Only conversions between floating types are known to be unambigiously
// Only conversions between floating types are known to be unambigiously
// supported.
// supported.
if
(
is_float
&&
name
==
"convert"
)
if
(
is_float
and
name
==
"convert"
)
{
{
return
std
::
all_of
(
i
.
inputs
().
begin
(),
i
.
inputs
().
end
(),
[](
const
auto
&
arg
)
{
return
std
::
all_of
(
i
.
inputs
().
begin
(),
i
.
inputs
().
end
(),
[](
const
auto
&
arg
)
{
return
contains
({
type_t
::
float_type
,
type_t
::
half_type
},
arg
->
get_shape
().
type
());
return
contains
({
type_t
::
float_type
,
type_t
::
half_type
},
arg
->
get_shape
().
type
());
...
...
src/targets/gpu/include/migraphx/gpu/compiler.hpp
View file @
f12064ee
...
@@ -32,6 +32,7 @@
...
@@ -32,6 +32,7 @@
#include <migraphx/instruction.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/optional.hpp>
#include <migraphx/optional.hpp>
#include <migraphx/rank.hpp>
#include <migraphx/rank.hpp>
#include <migraphx/gpu/tuning_config.hpp>
#include <functional>
#include <functional>
namespace
migraphx
{
namespace
migraphx
{
...
@@ -68,12 +69,6 @@ struct compiler_replace
...
@@ -68,12 +69,6 @@ struct compiler_replace
}
}
};
};
struct
tuning_config
{
value
problem
;
std
::
vector
<
value
>
solutions
;
};
using
compiler_compile
=
using
compiler_compile
=
std
::
function
<
compiler_replace
(
context
&
,
instruction_ref
,
operation
,
const
value
&
)
>
;
std
::
function
<
compiler_replace
(
context
&
,
instruction_ref
,
operation
,
const
value
&
)
>
;
using
compiler_compile_op
=
using
compiler_compile_op
=
...
...
src/targets/gpu/include/migraphx/gpu/context.hpp
View file @
f12064ee
...
@@ -46,13 +46,7 @@ using hip_event_ptr = MIGRAPHX_MANAGE_PTR(hipEvent_t, hipEventDestroy);
...
@@ -46,13 +46,7 @@ using hip_event_ptr = MIGRAPHX_MANAGE_PTR(hipEvent_t, hipEventDestroy);
struct
hip_device
struct
hip_device
{
{
hip_device
()
hip_device
()
:
device_props
{}
{
add_stream
();
}
{
device_props
.
gcnArchName
[
0
]
=
'\0'
;
device_props
.
gcnArch
=
0
;
device_props
.
multiProcessorCount
=
0
;
add_stream
();
}
hip_device
(
std
::
size_t
id
,
std
::
size_t
n
)
:
device_id
(
id
)
hip_device
(
std
::
size_t
id
,
std
::
size_t
n
)
:
device_id
(
id
)
{
{
...
@@ -171,7 +165,7 @@ struct hip_device
...
@@ -171,7 +165,7 @@ struct hip_device
std
::
size_t
stream_id
()
const
{
return
current_stream
;
}
std
::
size_t
stream_id
()
const
{
return
current_stream
;
}
std
::
string
get_device_name
()
const
{
return
get_arch_name
(
device_props
)
;
}
std
::
string
get_device_name
()
const
{
return
device_props
.
gcnArchName
;
}
std
::
string
get_gfx_name
()
const
{
return
trim
(
split_string
(
get_device_name
(),
':'
).
front
());
}
std
::
string
get_gfx_name
()
const
{
return
trim
(
split_string
(
get_device_name
(),
':'
).
front
());
}
...
...
src/targets/gpu/include/migraphx/gpu/device_name.hpp
View file @
f12064ee
...
@@ -33,8 +33,6 @@ namespace migraphx {
...
@@ -33,8 +33,6 @@ namespace migraphx {
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
gpu
{
MIGRAPHX_GPU_EXPORT
std
::
string
get_arch_name
(
const
hipDeviceProp_t
&
props
);
MIGRAPHX_GPU_EXPORT
std
::
string
get_device_name
();
MIGRAPHX_GPU_EXPORT
std
::
string
get_device_name
();
MIGRAPHX_GPU_EXPORT
int
get_device_id
();
MIGRAPHX_GPU_EXPORT
int
get_device_id
();
...
...
src/targets/gpu/include/migraphx/gpu/hip.hpp
View file @
f12064ee
...
@@ -92,7 +92,7 @@ struct hip_sync_stream
...
@@ -92,7 +92,7 @@ struct hip_sync_stream
return
inputs
.
front
();
return
inputs
.
front
();
}
}
argument
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
argument
compute
(
const
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
{
{
gpu_sync
(
ctx
);
gpu_sync
(
ctx
);
if
(
args
.
empty
())
if
(
args
.
empty
())
...
...
src/targets/gpu/include/migraphx/gpu/mlir.hpp
View file @
f12064ee
...
@@ -29,6 +29,7 @@
...
@@ -29,6 +29,7 @@
#include <migraphx/gpu/config.hpp>
#include <migraphx/gpu/config.hpp>
#include <migraphx/gpu/code_object_op.hpp>
#include <migraphx/gpu/code_object_op.hpp>
#include <migraphx/instruction_ref.hpp>
#include <migraphx/instruction_ref.hpp>
#include <migraphx/gpu/tuning_config.hpp>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
@@ -36,16 +37,20 @@ struct module;
...
@@ -36,16 +37,20 @@ struct module;
namespace
gpu
{
namespace
gpu
{
MIGRAPHX_GPU_EXPORT
std
::
string
dump_mlir
(
const
module
&
m
);
MIGRAPHX_GPU_EXPORT
std
::
string
dump_mlir
(
const
module
&
m
);
MIGRAPHX_GPU_EXPORT
code_object_op
compile_mlir
(
const
context
&
migraphx_ctx
,
MIGRAPHX_GPU_EXPORT
code_object_op
compile_mlir
(
const
context
&
ctx
,
module
m
,
module
m
,
const
std
::
vector
<
instruction_ref
>&
inputs
);
const
std
::
vector
<
instruction_ref
>&
inputs
,
const
value
&
solution
);
MIGRAPHX_GPU_EXPORT
instruction_ref
insert_mlir
(
module
&
m
,
MIGRAPHX_GPU_EXPORT
instruction_ref
insert_mlir
(
module
&
m
,
instruction_ref
ins
,
instruction_ref
ins
,
code_object_op
co
,
code_object_op
co
,
const
std
::
vector
<
instruction_ref
>&
inputs
);
const
std
::
vector
<
instruction_ref
>&
inputs
);
MIGRAPHX_GPU_EXPORT
tuning_config
get_tuning_config_mlir
(
const
context
&
migraphx_ctx
,
module
m
,
const
std
::
vector
<
shape
>&
inputs
);
}
// namespace gpu
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
}
// namespace migraphx
...
...
src/targets/gpu/include/migraphx/gpu/tuning_config.hpp
0 → 100644
View file @
f12064ee
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_GPU_TUNING_CONFIG_HPP
#define MIGRAPHX_GUARD_GPU_TUNING_CONFIG_HPP
#include <migraphx/config.hpp>
#include <migraphx/value.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
struct
tuning_config
{
value
problem
;
std
::
vector
<
value
>
solutions
;
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif // MIGRAPHX_GUARD_GPU_TUNING_CONFIG_HPP
src/targets/gpu/jit/ck_gemm.cpp
View file @
f12064ee
...
@@ -300,7 +300,8 @@ struct ck_gemm_compiler : compiler<ck_gemm_compiler>
...
@@ -300,7 +300,8 @@ struct ck_gemm_compiler : compiler<ck_gemm_compiler>
const
auto
&
b_shape
=
inputs
[
1
];
const
auto
&
b_shape
=
inputs
[
1
];
const
auto
&
c_shape
=
inputs
.
back
();
const
auto
&
c_shape
=
inputs
.
back
();
auto
rank
=
a_shape
.
lens
().
size
();
// cppcheck-suppress unreadVariable
auto
rank
=
a_shape
.
ndim
();
auto
batch_count
=
get_batch_count
(
c_shape
);
auto
batch_count
=
get_batch_count
(
c_shape
);
auto
m
=
c_shape
.
lens
()[
rank
-
2
];
auto
m
=
c_shape
.
lens
()[
rank
-
2
];
...
...
src/targets/gpu/jit/mlir.cpp
View file @
f12064ee
...
@@ -36,11 +36,12 @@ struct mlir_compiler : compiler<mlir_compiler>
...
@@ -36,11 +36,12 @@ struct mlir_compiler : compiler<mlir_compiler>
operation
compile_op
(
context
&
,
const
std
::
vector
<
shape
>&
,
const
value
&
)
const
{
return
{};
}
operation
compile_op
(
context
&
,
const
std
::
vector
<
shape
>&
,
const
value
&
)
const
{
return
{};
}
compiler_replace
compile
(
context
&
ctx
,
instruction_ref
ins
,
const
operation
&
)
const
compiler_replace
compile
(
const
context
&
ctx
,
instruction_ref
ins
,
const
operation
&
,
const
value
&
solution
)
const
{
{
auto
*
smod
=
ins
->
module_inputs
().
front
();
auto
*
smod
=
ins
->
module_inputs
().
front
();
assert
(
smod
->
get_parameter_names
().
size
()
==
ins
->
inputs
().
size
()
-
1
);
assert
(
smod
->
get_parameter_names
().
size
()
==
ins
->
inputs
().
size
()
-
1
);
return
insert
(
compile_mlir
(
ctx
,
*
smod
,
ins
->
inputs
()));
return
insert
(
compile_mlir
(
ctx
,
*
smod
,
ins
->
inputs
()
,
solution
));
}
}
compiler_replace
insert
(
code_object_op
co
)
const
compiler_replace
insert
(
code_object_op
co
)
const
...
@@ -50,6 +51,18 @@ struct mlir_compiler : compiler<mlir_compiler>
...
@@ -50,6 +51,18 @@ struct mlir_compiler : compiler<mlir_compiler>
m
.
replace_instruction
(
ins
,
mlir
);
m
.
replace_instruction
(
ins
,
mlir
);
}};
}};
}
}
optional
<
tuning_config
>
get_tuning_config
(
const
context
&
ctx
,
instruction_ref
ins
,
const
operation
&
,
bool
exhaustive
)
const
{
if
(
not
exhaustive
)
return
nullopt
;
auto
shapes
=
to_shapes
(
ins
->
inputs
());
auto
*
smod
=
ins
->
module_inputs
().
front
();
return
get_tuning_config_mlir
(
ctx
,
*
smod
,
shapes
);
}
};
};
}
// namespace gpu
}
// namespace gpu
...
...
src/targets/gpu/jit/pointwise.cpp
View file @
f12064ee
...
@@ -72,7 +72,7 @@ struct pointwise_compiler : compiler<pointwise_compiler>
...
@@ -72,7 +72,7 @@ struct pointwise_compiler : compiler<pointwise_compiler>
hip_compile_options
options
;
hip_compile_options
options
;
options
.
inputs
=
inputs
;
options
.
inputs
=
inputs
;
options
.
output
=
inputs
.
back
();
options
.
output
=
inputs
.
back
();
options
.
virtual_inputs
=
reduce_dims
(
inputs
);
options
.
virtual_inputs
=
reduce_dims
(
normalize_permutation
(
inputs
)
)
;
options
.
params
=
"-Wno-float-equal"
;
options
.
params
=
"-Wno-float-equal"
;
auto
axis
=
find_fast_axis
(
options
.
virtual_inputs
);
auto
axis
=
find_fast_axis
(
options
.
virtual_inputs
);
auto
vec
=
vectorize
::
elements
(
ctx
,
axis
,
options
.
virtual_inputs
);
auto
vec
=
vectorize
::
elements
(
ctx
,
axis
,
options
.
virtual_inputs
);
...
...
src/targets/gpu/jit/reduce.cpp
View file @
f12064ee
...
@@ -84,7 +84,7 @@ static shape get_reduced_shape(const shape& s, const std::vector<T>& axes)
...
@@ -84,7 +84,7 @@ static shape get_reduced_shape(const shape& s, const std::vector<T>& axes)
std
::
fill
(
lens
.
begin
(),
lens
.
end
(),
1
);
std
::
fill
(
lens
.
begin
(),
lens
.
end
(),
1
);
for
(
const
auto
&
axis
:
axes
)
for
(
const
auto
&
axis
:
axes
)
lens
[
axis
]
=
s
.
lens
()[
axis
];
lens
[
axis
]
=
s
.
lens
()[
axis
];
return
s
hape
{
s
.
type
(),
lens
}
;
return
s
.
with_lens
(
lens
)
;
}
}
template
<
class
T
>
template
<
class
T
>
...
@@ -93,7 +93,7 @@ static shape get_output_shape(const shape& s, const std::vector<T>& axes)
...
@@ -93,7 +93,7 @@ static shape get_output_shape(const shape& s, const std::vector<T>& axes)
auto
lens
=
s
.
lens
();
auto
lens
=
s
.
lens
();
for
(
const
auto
&
axis
:
axes
)
for
(
const
auto
&
axis
:
axes
)
lens
[
axis
]
=
1
;
lens
[
axis
]
=
1
;
return
s
hape
{
s
.
type
(),
lens
}
;
return
s
.
with_lens
(
lens
)
;
}
}
template
<
class
ReduceLens
>
template
<
class
ReduceLens
>
...
@@ -228,7 +228,7 @@ struct fused_reduce_compiler : compiler<fused_reduce_compiler>
...
@@ -228,7 +228,7 @@ struct fused_reduce_compiler : compiler<fused_reduce_compiler>
auto
virtual_inputs
=
inputs
;
auto
virtual_inputs
=
inputs
;
virtual_inputs
.
push_back
(
get_reduced_shape
(
inputs
.
front
(),
axes
));
virtual_inputs
.
push_back
(
get_reduced_shape
(
inputs
.
front
(),
axes
));
virtual_inputs
.
push_back
(
get_output_shape
(
inputs
.
front
(),
axes
));
virtual_inputs
.
push_back
(
get_output_shape
(
inputs
.
front
(),
axes
));
virtual_inputs
=
reduce_dims
(
virtual_inputs
);
virtual_inputs
=
reduce_dims
(
normalize_permutation
(
virtual_inputs
)
)
;
auto
reduce_output_shape
=
virtual_inputs
.
back
();
auto
reduce_output_shape
=
virtual_inputs
.
back
();
virtual_inputs
.
pop_back
();
virtual_inputs
.
pop_back
();
auto
reduction_shape
=
virtual_inputs
.
back
();
auto
reduction_shape
=
virtual_inputs
.
back
();
...
...
Prev
1
2
3
4
5
6
7
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment