Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
b9d37172
Commit
b9d37172
authored
Oct 10, 2023
by
Khalique Ahmed
Browse files
manual merge
parents
1af66a1c
ea62d7aa
Changes
337
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
483 additions
and
147 deletions
+483
-147
src/py/include/migraphx/py.hpp
src/py/include/migraphx/py.hpp
+2
-1
src/py/py_loader.cpp
src/py/py_loader.cpp
+1
-1
src/rewrite_pooling.cpp
src/rewrite_pooling.cpp
+5
-16
src/rewrite_quantization.cpp
src/rewrite_quantization.cpp
+5
-7
src/shape.cpp
src/shape.cpp
+23
-16
src/simplify_algebra.cpp
src/simplify_algebra.cpp
+124
-39
src/simplify_dyn_ops.cpp
src/simplify_dyn_ops.cpp
+141
-0
src/simplify_reshapes.cpp
src/simplify_reshapes.cpp
+48
-2
src/split_single_dyn_dim.cpp
src/split_single_dyn_dim.cpp
+0
-32
src/sqlite.cpp
src/sqlite.cpp
+1
-0
src/targets/cpu/gemm.cpp
src/targets/cpu/gemm.cpp
+5
-1
src/targets/cpu/include/migraphx/cpu/dnnl.hpp
src/targets/cpu/include/migraphx/cpu/dnnl.hpp
+5
-1
src/targets/cpu/lowering.cpp
src/targets/cpu/lowering.cpp
+1
-1
src/targets/cpu/target.cpp
src/targets/cpu/target.cpp
+1
-1
src/targets/gpu/CMakeLists.txt
src/targets/gpu/CMakeLists.txt
+8
-1
src/targets/gpu/compile_gen.cpp
src/targets/gpu/compile_gen.cpp
+1
-1
src/targets/gpu/compile_hip.cpp
src/targets/gpu/compile_hip.cpp
+55
-4
src/targets/gpu/compile_hip_code_object.cpp
src/targets/gpu/compile_hip_code_object.cpp
+31
-20
src/targets/gpu/device/include/migraphx/gpu/device/launch.hpp
...targets/gpu/device/include/migraphx/gpu/device/launch.hpp
+24
-1
src/targets/gpu/device/include/migraphx/gpu/device/nary.hpp
src/targets/gpu/device/include/migraphx/gpu/device/nary.hpp
+2
-2
No files found.
src/py/include/migraphx/py.hpp
View file @
b9d37172
...
...
@@ -26,11 +26,12 @@
#include <migraphx/config.hpp>
#include <migraphx/program.hpp>
#include <migraphx/py/export.h>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
program
load_py
(
const
std
::
string
&
filename
);
MIGRAPHX_PY_EXPORT
program
load_py
(
const
std
::
string
&
filename
);
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
...
...
src/py/py_loader.cpp
View file @
b9d37172
...
...
@@ -64,7 +64,7 @@ static dynamic_loader py_lib()
return
lib
;
}
program
load_py
(
const
std
::
string
&
filename
)
MIGRAPHX_PY_EXPORT
program
load_py
(
const
std
::
string
&
filename
)
{
static
auto
f
=
py_lib
().
get_function
<
program
(
const
std
::
string
&
)
>
(
"migraphx_load_py"
);
return
f
(
filename
);
...
...
src/rewrite_pooling.cpp
View file @
b9d37172
...
...
@@ -43,9 +43,7 @@ void rewrite_pooling::apply(module& m) const
continue
;
if
(
ins
->
inputs
().
empty
())
continue
;
auto
&&
s
=
ins
->
inputs
().
front
()
->
get_shape
();
if
(
not
s
.
standard
())
continue
;
auto
&&
s
=
ins
->
inputs
().
front
()
->
get_shape
();
auto
&&
op
=
any_cast
<
op
::
pooling
>
(
ins
->
get_operator
());
if
(
not
std
::
all_of
(
op
.
padding
.
begin
(),
op
.
padding
.
end
(),
[](
auto
i
)
{
return
i
==
0
;
}))
continue
;
...
...
@@ -54,27 +52,18 @@ void rewrite_pooling::apply(module& m) const
auto
lens
=
s
.
lens
();
if
(
not
std
::
equal
(
lens
.
begin
()
+
2
,
lens
.
end
(),
op
.
lengths
.
begin
(),
op
.
lengths
.
end
()))
continue
;
std
::
int64_t
n
=
s
.
lens
()[
0
];
std
::
int64_t
c
=
s
.
lens
()[
1
];
auto
reshape
=
m
.
insert_instruction
(
ins
,
make_op
(
"reshape"
,
{{
"dims"
,
{
n
*
c
,
-
1
}}}),
ins
->
inputs
().
front
());
instruction_ref
pooling
{};
std
::
vector
<
std
::
int64_t
>
axes
(
lens
.
size
()
-
2
);
std
::
iota
(
axes
.
begin
(),
axes
.
end
(),
2
);
// average pooling
if
(
op
.
mode
==
op
::
pooling_mode
::
average
)
{
pooling
=
m
.
insert
_instruction
(
ins
,
make_op
(
"reduce_mean"
,
{{
"axes"
,
{
1
}
}}),
reshape
);
m
.
replace
_instruction
(
ins
,
make_op
(
"reduce_mean"
,
{{
"axes"
,
axes
}}),
ins
->
inputs
()
);
}
// max pooling
else
{
pooling
=
m
.
insert
_instruction
(
ins
,
make_op
(
"reduce_max"
,
{{
"axes"
,
{
1
}
}}),
reshape
);
m
.
replace
_instruction
(
ins
,
make_op
(
"reduce_max"
,
{{
"axes"
,
axes
}}),
ins
->
inputs
()
);
}
std
::
vector
<
int64_t
>
rsp_lens
(
lens
.
size
(),
1
);
rsp_lens
[
0
]
=
n
;
rsp_lens
[
1
]
=
c
;
m
.
replace_instruction
(
ins
,
make_op
(
"reshape"
,
{{
"dims"
,
rsp_lens
}}),
pooling
);
}
}
...
...
src/rewrite_quantization.cpp
View file @
b9d37172
...
...
@@ -28,6 +28,7 @@
#include <migraphx/tune_axis.hpp>
#include <migraphx/program.hpp>
#include <migraphx/shape.hpp>
#include <migraphx/common.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
...
@@ -61,13 +62,10 @@ void apply_quantizelinear(module& m, instruction_ref ins)
max_quant
=
qt
.
max
();
min_quant
=
qt
.
min
();
});
auto
s
=
add_zero_point
->
get_shape
();
std
::
vector
<
int
>
min_data
(
s
.
elements
(),
min_quant
);
std
::
vector
<
int
>
max_data
(
s
.
elements
(),
max_quant
);
auto
min_arg
=
m
.
add_literal
(
literal
(
s
,
min_data
));
auto
max_arg
=
m
.
add_literal
(
literal
(
s
,
max_data
));
auto
saturate
=
m
.
insert_instruction
(
ins
,
make_op
(
"clip"
),
add_zero_point
,
min_arg
,
max_arg
);
auto
s
=
add_zero_point
->
get_shape
();
auto
min_arg
=
m
.
add_literal
(
literal
{
shape
{
s
.
type
()},
{
min_quant
}});
auto
max_arg
=
m
.
add_literal
(
literal
{
shape
{
s
.
type
()},
{
max_quant
}});
auto
saturate
=
insert_common_op
(
m
,
ins
,
make_op
(
"clip"
),
{
add_zero_point
,
min_arg
,
max_arg
});
m
.
replace_instruction
(
ins
,
make_op
(
"convert"
,
{{
"target_type"
,
ins
->
get_shape
().
type
()}}),
saturate
);
}
...
...
src/shape.cpp
View file @
b9d37172
...
...
@@ -50,13 +50,14 @@ struct shape_impl
{
assert
(
t
!=
shape
::
tuple_type
);
}
shape_impl
(
shape
::
type_t
t
,
std
::
vector
<
std
::
size_t
>
l
)
:
m_type
(
t
),
m_lens
(
std
::
move
(
l
)),
m_standard
(
true
)
{
assert
(
t
!=
shape
::
tuple_type
);
this
->
calculate_strides
();
assert
(
m_lens
.
size
()
==
m_strides
.
size
());
}
shape_impl
(
shape
::
type_t
t
,
std
::
vector
<
std
::
size_t
>
l
,
std
::
vector
<
std
::
size_t
>
s
)
:
m_type
(
t
),
m_lens
(
std
::
move
(
l
)),
m_strides
(
std
::
move
(
s
))
{
...
...
@@ -151,6 +152,22 @@ struct shape_impl
m_lens
.
begin
(),
m_lens
.
end
(),
std
::
size_t
{
1
},
std
::
multiplies
<
std
::
size_t
>
());
}
std
::
size_t
get_index
(
size_t
i
)
const
{
std
::
size_t
result
=
0
;
std
::
size_t
s
=
1
;
for
(
auto
k
:
migraphx
::
reverse
(
migraphx
::
range
(
m_lens
.
size
())))
{
std
::
size_t
stride
=
m_strides
[
k
];
std
::
size_t
len
=
m_lens
[
k
];
std
::
size_t
idx
=
(
i
%
(
s
*
len
))
/
s
;
result
+=
stride
*
idx
;
s
*=
len
;
}
return
result
;
}
std
::
vector
<
std
::
size_t
>
min_lens
()
const
{
std
::
vector
<
std
::
size_t
>
ret
(
m_dyn_dims
.
size
());
...
...
@@ -213,6 +230,7 @@ std::string shape::name(shape::type_t t)
}
MIGRAPHX_THROW
(
"Invalid type"
);
}
std
::
string
shape
::
cpp_type
(
shape
::
type_t
t
)
{
switch
(
t
)
...
...
@@ -229,10 +247,12 @@ std::string shape::cpp_type(shape::type_t t)
shape
::
shape
()
:
impl
(
shape_impl
::
default_shape
())
{}
shape
::
shape
(
type_t
t
)
:
impl
(
std
::
make_shared
<
shape_impl
>
(
t
))
{}
shape
::
shape
(
type_t
t
,
std
::
vector
<
std
::
size_t
>
l
)
:
impl
(
std
::
make_shared
<
shape_impl
>
(
t
,
std
::
move
(
l
)))
{
}
shape
::
shape
(
type_t
t
,
std
::
vector
<
std
::
size_t
>
l
,
std
::
vector
<
std
::
size_t
>
s
)
:
impl
(
std
::
make_shared
<
shape_impl
>
(
t
,
std
::
move
(
l
),
std
::
move
(
s
)))
{
...
...
@@ -358,21 +378,8 @@ std::size_t shape::index(std::size_t i) const
assert
(
this
->
lens
().
size
()
==
this
->
strides
().
size
());
if
(
this
->
standard
())
return
i
;
else
{
std
::
size_t
s
=
1
;
std
::
size_t
result
=
0
;
for
(
std
::
size_t
j
=
0
;
j
<
this
->
lens
().
size
();
j
++
)
{
const
std
::
size_t
k
=
this
->
lens
().
size
()
-
j
-
1
;
const
std
::
size_t
stride
=
this
->
strides
()[
k
];
const
std
::
size_t
len
=
this
->
lens
()[
k
];
const
std
::
size_t
idx
=
(
i
%
(
s
*
len
))
/
s
;
result
+=
stride
*
idx
;
s
*=
len
;
}
return
result
;
}
return
impl
->
get_index
(
i
);
}
std
::
vector
<
std
::
size_t
>
shape
::
multi
(
std
::
size_t
idx
)
const
...
...
src/simplify_algebra.cpp
View file @
b9d37172
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-202
2
Advanced Micro Devices, Inc. All rights reserved.
* Copyright (c) 2015-202
3
Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
...
...
@@ -521,6 +521,27 @@ struct find_inner_broadcast
})
<
(
lens
.
size
()
-
1
);
}))
return
;
if
(
broadcasts
.
size
()
>
1
)
{
auto
bcast_strides
=
broadcasts
.
front
()
->
get_shape
().
strides
().
size
();
std
::
vector
<
size_t
>
common_axis
(
bcast_strides
,
0
);
// go through the strides of each broadcast,
// keep track of values that are equal to 0 in a dimension
for
(
auto
i
=
0
;
i
<
bcast_strides
;
i
++
)
{
for
(
const
auto
&
broadcast
:
broadcasts
)
{
if
(
broadcast
->
get_shape
().
strides
()[
i
]
==
0
)
common_axis
[
i
]
++
;
}
}
// if no common broadcast axis, transformation is not useful
if
(
std
::
find_if
(
common_axis
.
begin
(),
common_axis
.
end
(),
[](
auto
num_common
)
{
return
num_common
>
1
;
})
==
common_axis
.
end
())
return
;
}
std
::
vector
<
instruction_ref
>
inputs
;
std
::
transform
(
broadcasts
.
begin
(),
broadcasts
.
end
(),
...
...
@@ -1106,8 +1127,9 @@ MIGRAPHX_PRED_MATCHER(horiz_conv_dot, instruction_ref ins)
};
};
auto
dots
=
std
::
count_if
(
ins
->
outputs
().
begin
(),
ins
->
outputs
().
end
(),
pred
(
"dot"
));
auto
qdots
=
std
::
count_if
(
ins
->
outputs
().
begin
(),
ins
->
outputs
().
end
(),
pred
(
"quant_dot"
));
auto
convs
=
std
::
count_if
(
ins
->
outputs
().
begin
(),
ins
->
outputs
().
end
(),
pred
(
"convolution"
));
return
(
dots
>=
2
or
convs
>=
2
);
return
(
dots
>=
2
or
convs
>=
2
or
qdots
>=
2
);
}
struct
find_conv_dot_horiz_fusion
...
...
@@ -1121,7 +1143,7 @@ struct find_conv_dot_horiz_fusion
auto
pred
=
[](
auto
i
,
auto
j
)
{
if
(
i
->
get_operator
()
!=
j
->
get_operator
())
return
false
;
if
(
not
contains
({
"dot"
,
"convolution"
},
i
->
name
()))
if
(
not
contains
({
"quant_dot"
,
"dot"
,
"convolution"
},
i
->
name
()))
return
true
;
auto
x
=
i
->
inputs
()[
1
]
->
get_shape
().
lens
();
auto
y
=
j
->
inputs
()[
1
]
->
get_shape
().
lens
();
...
...
@@ -1129,7 +1151,7 @@ struct find_conv_dot_horiz_fusion
return
false
;
// Check that non-axes match
int
axis
=
1
;
if
(
i
->
name
()
==
"dot"
)
if
(
i
->
name
()
==
"dot"
or
i
->
name
()
==
"quant_dot"
)
{
axis
=
x
.
size
()
-
1
;
}
...
...
@@ -1140,7 +1162,7 @@ struct find_conv_dot_horiz_fusion
if
(
std
::
distance
(
start
,
last
)
<
2
)
return
;
auto
&&
name
=
(
*
start
)
->
name
();
if
(
not
contains
({
"dot"
,
"convolution"
},
name
))
if
(
not
contains
({
"quant_dot"
,
"dot"
,
"convolution"
},
name
))
return
;
auto
op
=
(
*
start
)
->
get_operator
();
int
group
=
1
;
...
...
@@ -1155,7 +1177,7 @@ struct find_conv_dot_horiz_fusion
start
,
last
,
std
::
back_inserter
(
args
),
[
&
](
auto
x
)
{
return
x
->
inputs
().
at
(
1
);
});
int
axis
=
1
;
int
concat_axis
=
0
;
if
(
name
==
"dot"
)
if
(
name
==
"dot"
or
name
==
"quant_dot"
)
{
axis
=
int
(
args
.
front
()
->
get_shape
().
lens
().
size
()
-
1
);
concat_axis
=
axis
;
...
...
@@ -1335,48 +1357,59 @@ struct find_split_reshape
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
auto
slc
=
r
.
instructions
[
"slice"
];
auto
rsp
=
r
.
instructions
[
"reshape"
];
auto
slc
=
r
.
instructions
[
"slice"
];
auto
rsp
=
r
.
instructions
[
"reshape"
];
auto
input
=
slc
->
inputs
().
front
();
// Only apply simplification when slices are on a single axis
auto
axes
=
any_cast
<
op
::
slice
>
(
slc
->
get_operator
()).
axes
;
if
(
axes
.
size
()
>
1
)
{
return
;
}
auto
input
=
slc
->
inputs
().
front
();
auto
split_outputs
=
get_splits
(
input
);
if
(
split_outputs
.
empty
())
{
return
;
}
// Only want to apply this optimization if each split output is followed by
// a contiguous op and a reshape
if
(
std
::
any_of
(
split_outputs
.
begin
(),
split_outputs
.
end
(),
[](
auto
i
)
{
if
(
i
->
outputs
().
size
()
==
1
)
{
auto
cont
=
i
->
outputs
().
front
();
return
cont
->
outputs
().
size
()
!=
1
;
}
return
false
;
}))
// Find all the reshapes (similar to rsp) that can be simplified
std
::
vector
<
instruction_ref
>
conts
;
std
::
vector
<
instruction_ref
>
vec_rsp
;
// Iterate through slice and contiguous outputs to allow simplifications when
// slice is followed by multiple reshapes
for
(
auto
&
i
:
split_outputs
)
{
return
;
std
::
copy_if
(
i
->
outputs
().
begin
(),
i
->
outputs
().
end
(),
std
::
back_inserter
(
conts
),
[](
auto
j
)
{
return
j
->
name
()
==
"contiguous"
;
});
}
std
::
vector
<
instruction_ref
>
vec_rsp
(
split_outputs
.
size
());
std
::
transform
(
split_outputs
.
begin
(),
split_outputs
.
end
(),
vec_rsp
.
begin
(),
[](
auto
i
)
{
auto
cont
=
i
->
outputs
().
front
();
return
cont
->
outputs
().
front
();
});
for
(
auto
&
i
:
conts
)
{
std
::
copy_if
(
i
->
outputs
().
begin
(),
i
->
outputs
().
end
(),
std
::
back_inserter
(
vec_rsp
),
[
&
](
auto
j
)
{
return
j
->
get_operator
()
==
rsp
->
get_operator
();
});
}
// all outputs are reshape and of the same shape
auto
dims
=
any_cast
<
op
::
reshape
>
(
rsp
->
get_operator
()).
dims
;
if
(
not
same_ops
(
vec_rsp
))
// No simplification needed if there is only one slice -> cont -> reshape
if
(
vec_rsp
.
size
()
<=
1
)
{
return
;
}
// ensure reshape happens after the axis dimension
auto
axis
=
any_cast
<
op
::
slice
>
(
slc
->
get_operator
()).
axes
[
0
];
auto
axis
=
axes
[
0
];
auto
slc_lens
=
slc
->
get_shape
().
lens
();
auto
slc_dim_size
=
std
::
accumulate
(
slc_lens
.
begin
()
+
axis
,
slc_lens
.
end
(),
1
,
std
::
multiplies
<
std
::
size_t
>
());
auto
input_lens
=
input
->
get_shape
().
lens
();
auto
input_size
=
input
->
get_shape
().
elements
();
auto
slc_axis_len
=
input_lens
[
axis
];
// search the reshape output (standard shape) to decide which axis are
// in its output corresponding to the slc_dim_size
...
...
@@ -1403,16 +1436,67 @@ struct find_split_reshape
{
rsp_axis
=
std
::
distance
(
rsp_strides
.
begin
(),
ait
);
}
// calculate reshape output shape
std
::
vector
<
int64_t
>
vec_dims
(
vec_rsp
.
size
());
std
::
transform
(
vec_rsp
.
begin
(),
vec_rsp
.
end
(),
vec_dims
.
begin
(),
[
&
](
auto
is
)
{
return
is
->
get_shape
().
lens
()[
rsp_axis
];
});
// Calculate reshape output shape
// Need to find a reshape such that data represented by instructions in vec_rsp can be
// written as slices of this new reshape. This is done by holding all the dims constant in
// rsp_lens to compute the required dim for rsp_axis (axis that will be sliced)
// ex 1: Input Shape: {2, 12, 4}, Slice Axis: 1, Slices are: (0:4), (4:8), (8:12),
// Reshape Outputs: {2, 2, 2, 4}, {2, 2, 2, 4}, {2, 2, 2, 4}
// rsp_axis = 1, rsp_out_lens (initial) = {2, 1, 2, 4}, rsp_fixed_size = 2*1*2*4 = 16
// rsp_axis_len = 2*12*4 / 16 = 6
// rsp_out_lens (final) = {2, 6, 2, 4}
// ex 2: Input Shape: {2, 12, 4}, Slice Axis: 1, Slices are: (0:4), (4:8), (8:12),
// Reshape Outputs: {2, 16}, {2, 16}, {2, 16}
// rsp_axis = 1, rsp_out_lens (initial) = {2, 1}, rsp_fixed_size = 2*1 = 2
// rsp_axis_len = 2*12*4 / 2 = 48
// rsp_out_lens (final) = {2, 48}
std
::
vector
<
int64_t
>
rsp_out_lens
(
rsp_lens
.
begin
(),
rsp_lens
.
end
());
rsp_out_lens
[
rsp_axis
]
=
1
;
auto
rsp_fixed_size
=
std
::
accumulate
(
rsp_out_lens
.
begin
(),
rsp_out_lens
.
end
(),
1
,
std
::
multiplies
<
std
::
size_t
>
());
rsp_out_lens
[
rsp_axis
]
=
std
::
accumulate
(
vec_dims
.
begin
(),
vec_dims
.
end
(),
std
::
int64_t
{
0
});
// cannot create a valid reshape for simplification
if
(
input_size
%
rsp_fixed_size
!=
0
)
{
return
;
}
auto
rsp_axis_len
=
input_size
/
rsp_fixed_size
;
rsp_out_lens
[
rsp_axis
]
=
rsp_axis_len
;
// Calculate new slice start and end indices. Indices are scaled using the new reshape axis
// and the original slice axis. See examples:
// ex 1: Input Shape: {2, 12, 4}, Slice Axis: 1, Slices are: (0:4), (4:8), (8:12),
// Reshape Outputs: {2, 2, 2, 4}, {2, 2, 2, 4}, {2, 2, 2, 4}
// slc_axis_len = 12, rsp_axis_len = 6
// New Starts: {0*6/12, 4*6/12, 8*6/12} = {0, 2, 4}
// New Ends: {4*6/12, 8*6/12, 12*6/12} = {2, 4, 6}
// ex 2: Input Shape: {2, 12, 4}, Slice Axis: 1, Slices are: (0:4), (4:8), (8:12),
// Reshape Outputs: {2, 16}, {2, 16}, {2, 16}
// slc_axis_len = 12, rsp_axis_len = 48
// New Starts: {0*48/12, 4*48/12, 8*48/12} = { 0, 16, 32}
// New Ends: {4*48/12, 8*48/12, 12*48/12} = {16, 32, 48}
std
::
vector
<
int64_t
>
new_starts
(
vec_rsp
.
size
());
std
::
transform
(
vec_rsp
.
begin
(),
vec_rsp
.
end
(),
new_starts
.
begin
(),
[
&
](
auto
is
)
{
auto
cont
=
is
->
inputs
().
front
();
auto
og_slc
=
cont
->
inputs
().
front
();
return
any_cast
<
op
::
slice
>
(
og_slc
->
get_operator
()).
starts
[
0
]
*
rsp_axis_len
/
slc_axis_len
;
});
std
::
vector
<
int64_t
>
new_ends
(
vec_rsp
.
size
());
std
::
transform
(
vec_rsp
.
begin
(),
vec_rsp
.
end
(),
new_ends
.
begin
(),
[
&
](
auto
is
)
{
auto
cont
=
is
->
inputs
().
front
();
auto
og_slc
=
cont
->
inputs
().
front
();
return
any_cast
<
op
::
slice
>
(
og_slc
->
get_operator
()).
ends
[
0
]
*
rsp_axis_len
/
slc_axis_len
;
});
// insert the reshape instruction and add contiguous if needed
if
(
not
input
->
get_shape
().
standard
())
...
...
@@ -1423,16 +1507,14 @@ struct find_split_reshape
std
::
next
(
input
),
make_op
(
"reshape"
,
{{
"dims"
,
rsp_out_lens
}}),
input
);
// replace the original reshape with slice
int64_t
start
=
0
;
for
(
std
::
size_t
i
=
0
;
i
<
vec_rsp
.
size
();
++
i
)
{
m
.
replace_instruction
(
vec_rsp
[
i
],
make_op
(
"slice"
,
{{
"axes"
,
{
rsp_axis
}},
{
"starts"
,
{
start
}},
{
"ends"
,
{
start
+
vec_dim
s
[
i
]}}}),
{{
"axes"
,
{
rsp_axis
}},
{
"starts"
,
{
new_
start
s
[
i
]
}},
{
"ends"
,
{
new_end
s
[
i
]}}}),
rsp_ins
);
start
+=
vec_dims
[
i
];
}
}
};
...
...
@@ -1456,10 +1538,13 @@ struct find_split_transpose
{
return
;
}
if
(
std
::
any_of
(
split_outputs
.
begin
(),
split_outputs
.
end
(),
[](
auto
i
)
{
return
i
->
outputs
().
size
()
!=
1
;
}))
return
;
std
::
vector
<
instruction_ref
>
vec_trans
(
split_outputs
.
size
());
std
::
transform
(
split_outputs
.
begin
(),
split_outputs
.
end
(),
vec_trans
.
begin
(),
[](
auto
i
)
{
assert
(
i
->
outputs
().
size
()
==
1
);
return
i
->
outputs
().
front
();
});
...
...
src/simplify_dyn_ops.cpp
0 → 100644
View file @
b9d37172
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/simplify_dyn_ops.hpp>
#include <migraphx/matcher.hpp>
#include <migraphx/make_op.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
/**
* Convert 2 input static shape broadcast/multibroadcast into 1 input version.
* Some compiler passes (ex. simplify_algebra) only support the 1 input versions
* of the broadcasting operators.
*/
struct
find_static_2in_broadcasts
{
auto
matcher
()
const
{
return
match
::
broadcast
(
match
::
nargs
(
2
),
match
::
arg
(
0
)(
match
::
static_shape
()),
match
::
arg
(
1
)(
match
::
static_shape
()));
}
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
mr
)
const
{
auto
ins
=
mr
.
result
;
auto
out_lens
=
ins
->
get_shape
().
lens
();
auto
broadcast_op
=
ins
->
get_operator
();
if
(
broadcast_op
.
name
()
==
"broadcast"
)
{
broadcast_op
.
from_value
({{
"out_lens"
,
out_lens
}});
}
else
{
broadcast_op
.
from_value
({{
"out_lens"
,
out_lens
},
{
"out_dyn_dims"
,
{}}});
}
m
.
replace_instruction
(
ins
,
broadcast_op
,
ins
->
inputs
().
at
(
0
));
}
};
/**
* Simplify slice with variable `starts` and `ends` to the constant version if
* the `input_starts` and `input_ends` inputs are constant.
*/
struct
find_const_3in_slice
{
auto
matcher
()
const
{
return
match
::
name
(
"slice"
)(
match
::
nargs
(
3
),
match
::
arg
(
1
)(
match
::
is_constant
()),
match
::
arg
(
2
)(
match
::
is_constant
()));
}
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
mr
)
const
{
auto
ins
=
mr
.
result
;
auto
inputs
=
ins
->
inputs
();
argument
starts_arg
=
inputs
.
at
(
1
)
->
eval
();
argument
ends_arg
=
inputs
.
at
(
2
)
->
eval
();
if
(
not
starts_arg
.
empty
()
and
not
ends_arg
.
empty
())
{
std
::
vector
<
int64_t
>
starts_vec
;
std
::
vector
<
int64_t
>
ends_vec
;
starts_arg
.
visit
([
&
](
auto
output
)
{
starts_vec
.
assign
(
output
.
begin
(),
output
.
end
());
});
ends_arg
.
visit
([
&
](
auto
output
)
{
ends_vec
.
assign
(
output
.
begin
(),
output
.
end
());
});
auto
slice_val
=
ins
->
get_operator
().
to_value
();
auto
axes_vec
=
slice_val
.
at
(
"axes"
).
to_vector
<
int64_t
>
();
m
.
replace_instruction
(
ins
,
make_op
(
"slice"
,
{{
"starts"
,
starts_vec
},
{
"ends"
,
ends_vec
},
{
"axes"
,
axes_vec
}}),
inputs
.
at
(
0
));
}
}
};
/**
* Simplify slice with variable `starts`, `ends`, and `input_axes` to the constant version if
* the `input_starts`, `input_ends`, and `input_axes` inputs are constant.
*/
struct
find_const_4in_slice
{
auto
matcher
()
const
{
return
match
::
name
(
"slice"
)(
match
::
nargs
(
4
),
match
::
arg
(
1
)(
match
::
is_constant
()),
match
::
arg
(
2
)(
match
::
is_constant
()),
match
::
arg
(
3
)(
match
::
is_constant
()));
}
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
mr
)
const
{
auto
ins
=
mr
.
result
;
auto
inputs
=
ins
->
inputs
();
argument
starts_arg
=
inputs
.
at
(
1
)
->
eval
();
argument
ends_arg
=
inputs
.
at
(
2
)
->
eval
();
argument
axes_arg
=
inputs
.
at
(
3
)
->
eval
();
if
(
not
starts_arg
.
empty
()
and
not
ends_arg
.
empty
()
and
not
axes_arg
.
empty
())
{
std
::
vector
<
int64_t
>
starts_vec
;
std
::
vector
<
int64_t
>
ends_vec
;
std
::
vector
<
int64_t
>
axes_vec
;
starts_arg
.
visit
([
&
](
auto
output
)
{
starts_vec
.
assign
(
output
.
begin
(),
output
.
end
());
});
ends_arg
.
visit
([
&
](
auto
output
)
{
ends_vec
.
assign
(
output
.
begin
(),
output
.
end
());
});
axes_arg
.
visit
([
&
](
auto
output
)
{
axes_vec
.
assign
(
output
.
begin
(),
output
.
end
());
});
m
.
replace_instruction
(
ins
,
make_op
(
"slice"
,
{{
"starts"
,
starts_vec
},
{
"ends"
,
ends_vec
},
{
"axes"
,
axes_vec
}}),
inputs
.
at
(
0
));
}
}
};
void
simplify_dyn_ops
::
apply
(
module
&
m
)
const
{
match
::
find_matches
(
m
,
find_static_2in_broadcasts
{},
find_const_3in_slice
{},
find_const_4in_slice
{});
}
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/simplify_reshapes.cpp
View file @
b9d37172
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-202
2
Advanced Micro Devices, Inc. All rights reserved.
* Copyright (c) 2015-202
3
Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
...
...
@@ -122,6 +122,11 @@ struct find_nop_reshapes
reshapes
.
insert
(
"pad"
);
reshapes
.
insert
(
"slice"
);
reshapes
.
insert
(
"transpose"
);
reshapes
.
insert
(
"reduce_mean"
);
reshapes
.
insert
(
"reduce_max"
);
reshapes
.
insert
(
"reduce_min"
);
reshapes
.
insert
(
"reduce_sum"
);
reshapes
.
insert
(
"reduce_prod"
);
return
match
::
name
(
reshapes
)(
match
::
same_shape
(
match
::
arg
(
0
)));
}
...
...
@@ -627,6 +632,46 @@ struct find_transpose_contiguous_reshaper_unary
}
};
// simplifies broadcast->transpose to transpose->broadcast
// in the case of a scalar, simply rewrite to broadcast
// this can allow for further optimizations with find_inner_broadcast() in simplify_algebra.cpp
struct
find_broadcast_transpose
{
auto
matcher
()
const
{
return
match
::
name
(
"transpose"
)(
match
::
arg
(
0
)(
match
::
name
(
"multibroadcast"
).
bind
(
"bcast_ins"
)));
}
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
auto
transpose
=
r
.
result
;
auto
transpose_lens
=
transpose
->
get_shape
().
lens
();
auto
bcast_ins
=
r
.
instructions
[
"bcast_ins"
];
auto
input
=
bcast_ins
->
inputs
().
front
();
// scalar transformation does not need extra transpose
if
(
not
input
->
get_shape
().
scalar
())
{
// find common shape
auto
in_lens
=
input
->
get_shape
().
lens
();
int
lens_diff
=
transpose_lens
.
size
()
-
in_lens
.
size
();
// insert unsqueeze if input lens < transpose lens
if
(
lens_diff
>
0
)
{
std
::
vector
<
size_t
>
unsqueeze_axes
(
lens_diff
);
std
::
iota
(
unsqueeze_axes
.
begin
(),
unsqueeze_axes
.
end
(),
0
);
input
=
m
.
insert_instruction
(
bcast_ins
,
make_op
(
"unsqueeze"
,
{{
"axes"
,
unsqueeze_axes
}}),
input
);
}
// apply transpose before the multibroadcast
input
=
m
.
insert_instruction
(
bcast_ins
,
transpose
->
get_operator
(),
input
);
}
auto
new_mbcast
=
m
.
insert_instruction
(
bcast_ins
,
make_op
(
"multibroadcast"
,
{{
"out_lens"
,
transpose_lens
}}),
input
);
m
.
replace_instruction
(
transpose
,
new_mbcast
);
}
};
struct
find_slice_transpose
{
auto
matcher
()
const
...
...
@@ -784,7 +829,7 @@ struct find_transpose_slice
void
simplify_reshapes
::
apply
(
module
&
m
)
const
{
for
(
int
i
=
0
;
i
<
4
;
i
++
)
for
(
int
i
=
0
;
i
<
depth
;
i
++
)
{
match
::
find_matches
(
m
,
find_where_op
{},
...
...
@@ -799,6 +844,7 @@ void simplify_reshapes::apply(module& m) const
find_nested_slice
{},
find_nested_concat
{},
find_transpose_slice
{},
find_broadcast_transpose
{},
find_slice_transpose
{},
find_transpose_contiguous_reshaper_unary
{});
dead_code_elimination
{}.
apply
(
m
);
...
...
src/split_single_dyn_dim.cpp
View file @
b9d37172
...
...
@@ -68,37 +68,6 @@ has_one_dyn_dim(const std::unordered_map<std::string, shape>& param_shapes)
dds_it
->
max
};
}
namespace
{
struct
find_static_2in_broadcasts
{
// Convert 2 input static shape broadcast/multibroadcast into 1 input version.
// Some compiler passes (ex. simplify_algebra) only support the 1 input versions
// of the broadcasting operators.
auto
matcher
()
const
{
return
match
::
broadcast
(
match
::
nargs
(
2
),
match
::
arg
(
0
)(
match
::
static_shape
()),
match
::
arg
(
1
)(
match
::
static_shape
()));
}
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
mr
)
const
{
auto
ins
=
mr
.
result
;
auto
out_lens
=
ins
->
get_shape
().
lens
();
auto
broadcast_op
=
ins
->
get_operator
();
if
(
broadcast_op
.
name
()
==
"broadcast"
)
{
broadcast_op
.
from_value
({{
"out_lens"
,
out_lens
}});
}
else
{
broadcast_op
.
from_value
({{
"out_lens"
,
out_lens
},
{
"out_dyn_dims"
,
{}}});
}
m
.
replace_instruction
(
ins
,
broadcast_op
,
ins
->
inputs
().
at
(
0
));
}
};
}
// namespace
/**
* Makes all the shapes in the dynamic_dimension range. Probably won't work for `if`
* and `loop` instructions, depending on how the submodules for those
...
...
@@ -135,7 +104,6 @@ void split_single_dyn_dim::apply(module_pass_manager& mpm) const
dd_check
->
dyn_param_str
,
migraphx
::
shape
{
dyn_param_shape
.
type
(),
static_lens
});
auto
outputs
=
submod
->
add_instructions
(
mm
,
map_ins
);
submod
->
add_return
({
outputs
});
match
::
find_matches
(
*
submod
,
find_static_2in_broadcasts
{});
submodules
.
push_back
(
submod
);
}
// redirect to select_module operator and return
...
...
src/sqlite.cpp
View file @
b9d37172
...
...
@@ -48,6 +48,7 @@ struct sqlite_impl
template
<
class
F
>
void
exec
(
const
char
*
sql
,
F
f
)
{
// cppcheck-suppress constParameterPointer
auto
callback
=
[](
void
*
obj
,
auto
...
xs
)
->
int
{
try
{
...
...
src/targets/cpu/gemm.cpp
View file @
b9d37172
...
...
@@ -43,7 +43,11 @@ struct dnnl_gemm : dnnl_extend_op<dnnl_gemm, dnnl::matmul, op::dot>
MIGRAPHX_DNNL_PREFIX
(
ARG_BIAS
)};
}
void
required
(
const
check_shapes
&
cs
)
const
{
cs
.
not_broadcasted
();
}
template
<
class
T
>
void
required
(
const
check_shapes
<
T
>&
cs
)
const
{
cs
.
not_broadcasted
();
}
dnnl
::
matmul
::
desc
get_desc
(
const
std
::
unordered_map
<
int
,
dnnl
::
memory
::
desc
>&
m
)
const
{
...
...
src/targets/cpu/include/migraphx/cpu/dnnl.hpp
View file @
b9d37172
...
...
@@ -400,7 +400,11 @@ struct dnnl_extend_op : dnnl_op<Derived, Primitive>
}
// dnnl has some issues with non-packed inputs
void
required
(
const
check_shapes
&
cs
)
const
{
cs
.
packed_or_broadcasted
();
}
template
<
class
T
>
void
required
(
const
check_shapes
<
T
>&
cs
)
const
{
cs
.
packed_or_broadcasted
();
}
std
::
string
name
()
const
{
return
"dnnl::"
+
op
.
name
();
}
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
...
...
src/targets/cpu/lowering.cpp
View file @
b9d37172
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-202
2
Advanced Micro Devices, Inc. All rights reserved.
* Copyright (c) 2015-202
3
Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
...
...
src/targets/cpu/target.cpp
View file @
b9d37172
...
...
@@ -61,7 +61,7 @@ namespace cpu {
std
::
string
target
::
name
()
const
{
return
"cpu"
;
}
// cppcheck-suppress constParameter
// cppcheck-suppress constParameter
Reference
std
::
vector
<
pass
>
target
::
get_passes
(
migraphx
::
context
&
gctx
,
const
compile_options
&
)
const
{
auto
&
ctx
=
any_cast
<
context
>
(
gctx
);
...
...
src/targets/gpu/CMakeLists.txt
View file @
b9d37172
...
...
@@ -23,6 +23,10 @@
# ####################################################################################
list
(
APPEND CMAKE_PREFIX_PATH /opt/rocm
)
find_package
(
hip
)
if
(
NOT GPU_TARGETS
)
message
(
FATAL_ERROR
"HIP package is broken and has no GPU_TARGETS, please pass -DGPU_TARGETS=$(/opt/rocm/bin/rocminfo | grep -o -m1 'gfx.*') to cmake to build for your gpu."
)
endif
()
find_package
(
miopen
)
# rocblas
...
...
@@ -50,6 +54,7 @@ file(GLOB KERNEL_FILES CONFIGURE_DEPENDS
message
(
STATUS
"KERNEL_FILES:
${
KERNEL_FILES
}
"
)
add_embed_library
(
migraphx_kernels
${
KERNEL_FILES
}
RELATIVE
${
CMAKE_CURRENT_SOURCE_DIR
}
/kernels/include/
)
configure_file
(
device/targets.hpp.in include/migraphx/gpu/device/targets.hpp
)
file
(
GLOB DEVICE_GPU_SRCS CONFIGURE_DEPENDS
${
CMAKE_CURRENT_SOURCE_DIR
}
/device/*.cpp
)
add_library
(
migraphx_device
${
DEVICE_GPU_SRCS
}
)
...
...
@@ -69,6 +74,7 @@ rocm_clang_tidy_check(migraphx_device)
target_link_libraries
(
migraphx_device PUBLIC migraphx
)
target_link_libraries
(
migraphx_device PRIVATE compile_for_gpu
)
target_include_directories
(
migraphx_device PUBLIC $<BUILD_INTERFACE:
${
CMAKE_CURRENT_SOURCE_DIR
}
/include>
)
target_include_directories
(
migraphx_device PRIVATE $<BUILD_INTERFACE:
${
CMAKE_CURRENT_BINAR_DIR
}
/include>
)
target_include_directories
(
migraphx_device PRIVATE $<BUILD_INTERFACE:
${
CMAKE_CURRENT_SOURCE_DIR
}
/device/include>
)
target_compile_options
(
migraphx_device PRIVATE -Wno-ignored-attributes
)
migraphx_generate_export_header
(
migraphx_device DIRECTORY migraphx/gpu/device
)
...
...
@@ -123,6 +129,7 @@ add_library(migraphx_gpu
lrn.cpp
mlir.cpp
multinomial.cpp
no_device.cpp
nonzero.cpp
pack_args.cpp
pack_int8_args.cpp
...
...
@@ -191,7 +198,7 @@ register_op(migraphx_gpu HEADER migraphx/gpu/convolution.hpp
rocm_set_soversion
(
migraphx_gpu
${
MIGRAPHX_SO_VERSION
}
)
rocm_clang_tidy_check
(
migraphx_gpu
)
set
(
MIGRAPHX_ENABLE_MLIR O
FF
CACHE BOOL
""
)
set
(
MIGRAPHX_ENABLE_MLIR O
N
CACHE BOOL
""
)
if
(
MIGRAPHX_ENABLE_MLIR
)
# Find package rocMLIR
...
...
src/targets/gpu/compile_gen.cpp
View file @
b9d37172
...
...
@@ -331,7 +331,7 @@ static std::vector<std::string> get_op_names(const module& m)
{
if
(
starts_with
(
ins
.
name
(),
"@"
))
continue
;
if
(
ins
.
name
()
==
"multibroadcast"
)
if
(
contains
({
"multibroadcast"
,
"contiguous"
},
ins
.
name
())
)
continue
;
if
(
ins
.
name
()
==
"pointwise"
)
{
...
...
src/targets/gpu/compile_hip.cpp
View file @
b9d37172
...
...
@@ -28,6 +28,7 @@
#include <migraphx/env.hpp>
#include <cassert>
#include <iostream>
#include <deque>
#ifdef MIGRAPHX_USE_HIPRTC
#include <hip/hiprtc.h>
...
...
@@ -92,7 +93,7 @@ struct hiprtc_program
{
struct
string_array
{
std
::
vector
<
std
::
string
>
strings
{};
std
::
deque
<
std
::
string
>
strings
{};
std
::
vector
<
const
char
*>
c_strs
{};
string_array
()
{}
...
...
@@ -115,6 +116,12 @@ struct hiprtc_program
std
::
string
cpp_src
=
""
;
std
::
string
cpp_name
=
""
;
hiprtc_program
(
const
std
::
string
&
src
,
const
std
::
string
&
name
=
"main.cpp"
)
:
cpp_src
(
src
),
cpp_name
(
name
)
{
create_program
();
}
hiprtc_program
(
std
::
vector
<
hiprtc_src_file
>
srcs
)
{
for
(
auto
&&
src
:
srcs
)
...
...
@@ -130,6 +137,14 @@ struct hiprtc_program
include_names
.
push_back
(
std
::
move
(
src
.
path
));
}
}
create_program
();
}
void
create_program
()
{
assert
(
not
cpp_src
.
empty
());
assert
(
not
cpp_name
.
empty
());
assert
(
headers
.
size
()
==
include_names
.
size
());
prog
=
hiprtc_program_create
(
cpp_src
.
c_str
(),
cpp_name
.
c_str
(),
headers
.
size
(),
...
...
@@ -137,7 +152,7 @@ struct hiprtc_program
include_names
.
data
());
}
void
compile
(
const
std
::
vector
<
std
::
string
>&
options
)
const
void
compile
(
const
std
::
vector
<
std
::
string
>&
options
,
bool
quiet
=
false
)
const
{
if
(
enabled
(
MIGRAPHX_TRACE_HIPRTC
{}))
std
::
cout
<<
"hiprtc "
<<
join_strings
(
options
,
" "
)
<<
" "
<<
cpp_name
<<
std
::
endl
;
...
...
@@ -148,7 +163,7 @@ struct hiprtc_program
[](
const
std
::
string
&
s
)
{
return
s
.
c_str
();
});
auto
result
=
hiprtcCompileProgram
(
prog
.
get
(),
c_options
.
size
(),
c_options
.
data
());
auto
prog_log
=
log
();
if
(
not
prog_log
.
empty
())
if
(
not
prog_log
.
empty
()
and
not
quiet
)
{
std
::
cerr
<<
prog_log
<<
std
::
endl
;
}
...
...
@@ -195,7 +210,6 @@ std::vector<std::vector<char>> compile_hip_src_with_hiprtc(std::vector<hiprtc_sr
options
.
push_back
(
"-Wno-gnu-line-marker"
);
options
.
push_back
(
"-Wno-old-style-cast"
);
}
if
(
enabled
(
MIGRAPHX_GPU_DEBUG
{}))
options
.
push_back
(
"-DMIGRAPHX_DEBUG"
);
if
(
std
::
none_of
(
options
.
begin
(),
options
.
end
(),
[](
const
std
::
string
&
s
)
{
...
...
@@ -210,6 +224,20 @@ std::vector<std::vector<char>> compile_hip_src_with_hiprtc(std::vector<hiprtc_sr
return
{
prog
.
get_code_obj
()};
}
bool
hip_has_flags
(
const
std
::
vector
<
std
::
string
>&
flags
)
{
hiprtc_program
prog
{
" "
};
try
{
prog
.
compile
(
flags
,
true
);
return
true
;
}
catch
(...)
{
return
false
;
}
}
std
::
vector
<
std
::
vector
<
char
>>
compile_hip_src
(
const
std
::
vector
<
src_file
>&
srcs
,
std
::
string
params
,
const
std
::
string
&
arch
)
{
...
...
@@ -323,6 +351,29 @@ compile_hip_src(const std::vector<src_file>& srcs, std::string params, const std
return
{
compiler
.
compile
(
srcs
)};
}
bool
hip_has_flags
(
const
std
::
vector
<
std
::
string
>&
flags
)
{
src_compiler
compiler
;
compiler
.
compiler
=
MIGRAPHX_STRINGIZE
(
MIGRAPHX_HIP_COMPILER
);
compiler
.
flags
=
join_strings
(
flags
,
" "
)
+
" -x hip -c --offload-arch=gfx900 --cuda-device-only"
;
std
::
string
src
;
src_file
input
;
input
.
path
=
"main.cpp"
;
input
.
content
=
std
::
make_pair
(
src
.
data
(),
src
.
data
()
+
src
.
size
());
try
{
compiler
.
compile
({
input
});
return
true
;
}
catch
(...)
{
return
false
;
}
}
#endif // MIGRAPHX_USE_HIPRTC
std
::
string
enum_params
(
std
::
size_t
count
,
std
::
string
param
)
...
...
src/targets/gpu/compile_hip_code_object.cpp
View file @
b9d37172
...
...
@@ -91,28 +91,39 @@ __content__
return
replace_string
(
args_hpp
,
"__content__"
,
inner
);
}
static
std
::
vector
<
std
::
string
>
get_compiler_warnings
()
{
std
::
vector
<
std
::
string
>
warnings
=
{
"-Weverything"
,
"-Wno-c++98-compat"
,
"-Wno-c++98-compat-pedantic"
,
"-Wno-conversion"
,
"-Wno-double-promotion"
,
"-Wno-exit-time-destructors"
,
"-Wno-extra-semi"
,
"-Wno-extra-semi-stmt"
,
"-Wno-float-conversion"
,
"-Wno-gnu-anonymous-struct"
,
"-Wno-gnu-zero-variadic-macro-arguments"
,
"-Wno-missing-prototypes"
,
"-Wno-nested-anon-types"
,
"-Wno-padded"
,
"-Wno-shorten-64-to-32"
,
"-Wno-sign-conversion"
,
"-Wno-sign-compare"
,
"-Wno-unused-command-line-argument"
,
"-Wno-weak-vtables"
,
"-Wno-c99-extensions"
,
};
if
(
hip_has_flags
({
"-Werror"
,
"-Wunsafe-buffer-usage"
}))
warnings
.
push_back
(
"-Wno-unsafe-buffer-usage"
);
return
warnings
;
}
const
std
::
vector
<
std
::
string
>&
compiler_warnings
()
{
static
std
::
vector
<
std
::
string
>
warnings
=
{
"-Weverything"
,
"-Wno-c++98-compat"
,
"-Wno-c++98-compat-pedantic"
,
"-Wno-conversion"
,
"-Wno-double-promotion"
,
"-Wno-exit-time-destructors"
,
"-Wno-extra-semi"
,
"-Wno-extra-semi-stmt"
,
"-Wno-float-conversion"
,
"-Wno-gnu-anonymous-struct"
,
"-Wno-gnu-zero-variadic-macro-arguments"
,
"-Wno-missing-prototypes"
,
"-Wno-nested-anon-types"
,
"-Wno-padded"
,
"-Wno-shorten-64-to-32"
,
"-Wno-sign-conversion"
,
"-Wno-sign-compare"
,
"-Wno-unused-command-line-argument"
,
"-Wno-weak-vtables"
,
"-Wno-c99-extensions"
};
static
std
::
vector
<
std
::
string
>
warnings
=
get_compiler_warnings
();
return
warnings
;
}
...
...
src/targets/gpu/device/include/migraphx/gpu/device/launch.hpp
View file @
b9d37172
...
...
@@ -26,7 +26,9 @@
#include <hip/hip_runtime.h>
#include <migraphx/config.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/gpu/device/types.hpp>
#include <migraphx/gpu/device/targets.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
...
@@ -41,7 +43,7 @@ struct index
__device__
index_int
nglobal
()
const
{
return
blockDim
.
x
*
gridDim
.
x
;
}
// NOLINT
__device__
index_int
nlocal
()
const
{
return
blockDim
.
x
;
}
// NOLINT
__device__
index_int
nlocal
()
const
{
return
blockDim
.
x
;
}
// NOLINT
template
<
class
F
>
__device__
void
global_stride
(
index_int
n
,
F
f
)
const
...
...
@@ -79,8 +81,29 @@ inline auto launch(hipStream_t stream, index_int global, index_int local)
using
f_type
=
decltype
(
f
);
dim3
nblocks
(
global
/
local
);
dim3
nthreads
(
local
);
/*
hipGetLastError() returns error for the first failed HIP call that happened previously.
MIGraphX calls into various backend libraries and failed HIP calls can also happen there.
Calling hipGetLastError() would reset error code to hipSuccess, so that inside MIGraphX
failed call to hipLaunchKernelGGL() can be captured.
*/
hipError_t
flush_call
=
hipGetLastError
();
(
void
)(
flush_call
);
// cppcheck-suppress UseDeviceLaunch
hipLaunchKernelGGL
((
launcher
<
f_type
>
),
nblocks
,
nthreads
,
0
,
stream
,
f
);
hipError_t
kernel_launch_status
=
hipGetLastError
();
if
(
kernel_launch_status
!=
hipSuccess
)
{
std
::
string
message
=
hipGetErrorString
(
kernel_launch_status
);
if
(
not
contains
(
get_targets
(),
get_device_name
()))
{
message
+=
". Trying to run a kernel for "
+
get_device_name
()
+
" but MIGraphX was built for targets "
+
get_targets_as_string
()
+
". Please rebuild MIGraphX with -DGPU_TARGETS='"
+
get_device_name
()
+
"'."
;
}
MIGRAPHX_THROW
(
"MIGraphX device kernel failed to launch with error: "
+
message
);
}
};
}
...
...
src/targets/gpu/device/include/migraphx/gpu/device/nary.hpp
View file @
b9d37172
...
...
@@ -124,7 +124,7 @@ void nary_broadcast_vec_impl(
buffer
[
i
]
=
binput
.
data
()[
i
];
}
__syncthreads
();
auto
*
bp
=
as_pointer
(
buffer
);
const
auto
*
bp
=
as_pointer
(
buffer
);
// Process the data
for
(
size_t
i
=
idx
.
global
;
i
<
nelements
;
i
+=
nglobal
)
{
...
...
@@ -219,7 +219,7 @@ void nary_double_broadcast_vec_impl(
buffer
[
i
+
bdim_vec_len
]
=
binput2
.
data
()[
i
];
}
__syncthreads
();
auto
*
bp
=
as_pointer
(
buffer
);
const
auto
*
bp
=
as_pointer
(
buffer
);
// Process the data
for
(
size_t
i
=
idx
.
global
;
i
<
nelements
;
i
+=
nglobal
)
{
...
...
Prev
1
2
3
4
5
6
7
8
9
10
…
17
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment