Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
3a4d36cf
Commit
3a4d36cf
authored
Sep 30, 2022
by
charlie
Browse files
Merge branch 'develop' of github.com:ROCmSoftwarePlatform/AMDMIGraphX into dyn_model_test
parents
6bec381f
e19f78ae
Changes
384
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
208 additions
and
131 deletions
+208
-131
src/targets/ref/lowering.cpp
src/targets/ref/lowering.cpp
+0
-1
src/tf/parse_conv.cpp
src/tf/parse_conv.cpp
+1
-1
src/tf/parse_depthwiseconv.cpp
src/tf/parse_depthwiseconv.cpp
+1
-1
src/tf/parse_pooling.cpp
src/tf/parse_pooling.cpp
+1
-1
src/tf/parse_relu6.cpp
src/tf/parse_relu6.cpp
+3
-2
src/tf/tf_parser.cpp
src/tf/tf_parser.cpp
+3
-3
src/tmp_dir.cpp
src/tmp_dir.cpp
+1
-1
src/value.cpp
src/value.cpp
+5
-12
test/api/test_custom_op_gpu.cpp
test/api/test_custom_op_gpu.cpp
+1
-1
test/check_shapes_test.cpp
test/check_shapes_test.cpp
+1
-1
test/eval_test.cpp
test/eval_test.cpp
+1
-1
test/fpga/get_target_assignments.cpp
test/fpga/get_target_assignments.cpp
+12
-9
test/fuse_pointwise.cpp
test/fuse_pointwise.cpp
+1
-1
test/gpu/adjust_allocation.cpp
test/gpu/adjust_allocation.cpp
+5
-1
test/gpu/jit.cpp
test/gpu/jit.cpp
+2
-0
test/gpu/make_precompile_op.hpp
test/gpu/make_precompile_op.hpp
+66
-0
test/gpu/mlir.cpp
test/gpu/mlir.cpp
+2
-6
test/gpu/pack_int8_args.cpp
test/gpu/pack_int8_args.cpp
+84
-75
test/include/basic_ops.hpp
test/include/basic_ops.hpp
+5
-4
test/include/test.hpp
test/include/test.hpp
+13
-10
No files found.
src/targets/ref/lowering.cpp
View file @
3a4d36cf
...
@@ -244,7 +244,6 @@ struct ref_convolution : auto_register_op<ref_convolution<Op>>
...
@@ -244,7 +244,6 @@ struct ref_convolution : auto_register_op<ref_convolution<Op>>
auto
weights_lens
=
args
[
1
].
get_shape
().
lens
();
auto
weights_lens
=
args
[
1
].
get_shape
().
lens
();
std
::
vector
<
std
::
size_t
>
k_lens
{
weights_lens
.
begin
()
+
2
,
weights_lens
.
end
()};
std
::
vector
<
std
::
size_t
>
k_lens
{
weights_lens
.
begin
()
+
2
,
weights_lens
.
end
()};
padding
=
calc_dyn_auto_pad
(
img_lens
,
k_lens
,
op
.
stride
,
op
.
dilation
);
padding
=
calc_dyn_auto_pad
(
img_lens
,
k_lens
,
op
.
stride
,
op
.
dilation
);
std
::
cout
<<
"[ "
;
output_shape
=
output_shape
=
compute_padded_shape
({
args
.
at
(
0
).
get_shape
(),
args
.
at
(
1
).
get_shape
()},
padding
);
compute_padded_shape
({
args
.
at
(
0
).
get_shape
(),
args
.
at
(
1
).
get_shape
()},
padding
);
}
}
...
...
src/tf/parse_conv.cpp
View file @
3a4d36cf
...
@@ -100,7 +100,7 @@ struct parse_conv : op_parser<parse_conv>
...
@@ -100,7 +100,7 @@ struct parse_conv : op_parser<parse_conv>
{
{
MIGRAPHX_THROW
(
"padding should have 4 values"
);
MIGRAPHX_THROW
(
"padding should have 4 values"
);
}
}
if
(
padding
[
0
]
!=
padding
[
2
]
||
padding
[
1
]
!=
padding
[
3
])
if
(
padding
[
0
]
!=
padding
[
2
]
or
padding
[
1
]
!=
padding
[
3
])
{
{
MIGRAPHX_THROW
(
"migraphx does not support asymetric padding"
);
MIGRAPHX_THROW
(
"migraphx does not support asymetric padding"
);
}
}
...
...
src/tf/parse_depthwiseconv.cpp
View file @
3a4d36cf
...
@@ -90,7 +90,7 @@ struct parse_depthwiseconv : op_parser<parse_depthwiseconv>
...
@@ -90,7 +90,7 @@ struct parse_depthwiseconv : op_parser<parse_depthwiseconv>
calculate_padding
(
0
,
pads
,
input_dims
[
2
],
op
.
stride
[
0
],
op
.
dilation
[
0
],
weight_h
);
calculate_padding
(
0
,
pads
,
input_dims
[
2
],
op
.
stride
[
0
],
op
.
dilation
[
0
],
weight_h
);
calculate_padding
(
1
,
pads
,
input_dims
[
3
],
op
.
stride
[
1
],
op
.
dilation
[
1
],
weight_w
);
calculate_padding
(
1
,
pads
,
input_dims
[
3
],
op
.
stride
[
1
],
op
.
dilation
[
1
],
weight_w
);
if
(
pads
[
0
]
!=
pads
[
2
]
||
pads
[
1
]
!=
pads
[
3
])
if
(
pads
[
0
]
!=
pads
[
2
]
or
pads
[
1
]
!=
pads
[
3
])
{
{
std
::
vector
<
int64_t
>
padding
=
{
0
,
0
,
pads
[
0
],
pads
[
1
],
0
,
0
,
pads
[
2
],
pads
[
3
]};
std
::
vector
<
int64_t
>
padding
=
{
0
,
0
,
pads
[
0
],
pads
[
1
],
0
,
0
,
pads
[
2
],
pads
[
3
]};
l0
=
info
.
add_instruction
(
migraphx
::
make_op
(
"pad"
,
{{
"pads"
,
padding
}}),
l0
);
l0
=
info
.
add_instruction
(
migraphx
::
make_op
(
"pad"
,
{{
"pads"
,
padding
}}),
l0
);
...
...
src/tf/parse_pooling.cpp
View file @
3a4d36cf
...
@@ -42,7 +42,7 @@ struct parse_pooling : op_parser<parse_pooling>
...
@@ -42,7 +42,7 @@ struct parse_pooling : op_parser<parse_pooling>
tf_parser
::
node_info
info
,
tf_parser
::
node_info
info
,
std
::
vector
<
instruction_ref
>
args
)
const
std
::
vector
<
instruction_ref
>
args
)
const
{
{
if
(
!
starts_with
(
opd
.
tf_name
,
"Max"
)
&&
!
starts_with
(
opd
.
tf_name
,
"Av"
))
if
(
not
starts_with
(
opd
.
tf_name
,
"Max"
)
and
not
starts_with
(
opd
.
tf_name
,
"Av"
))
{
{
MIGRAPHX_THROW
(
"tf pooling mode must be Max or Average"
);
MIGRAPHX_THROW
(
"tf pooling mode must be Max or Average"
);
}
}
...
...
src/tf/parse_relu6.cpp
View file @
3a4d36cf
...
@@ -41,8 +41,9 @@ struct parse_relu6 : op_parser<parse_relu6>
...
@@ -41,8 +41,9 @@ struct parse_relu6 : op_parser<parse_relu6>
const
tf_parser
::
node_info
&
info
,
const
tf_parser
::
node_info
&
info
,
std
::
vector
<
instruction_ref
>
args
)
const
std
::
vector
<
instruction_ref
>
args
)
const
{
{
auto
min_val
=
info
.
add_literal
(
0.0
f
);
shape
::
type_t
output_type
=
args
[
0
]
->
get_shape
().
type
();
auto
max_val
=
info
.
add_literal
(
6.0
f
);
auto
min_val
=
info
.
add_literal
(
migraphx
::
literal
{
migraphx
::
shape
{
output_type
},
{
0.0
f
}});
auto
max_val
=
info
.
add_literal
(
migraphx
::
literal
{
migraphx
::
shape
{
output_type
},
{
6.0
f
}});
return
info
.
add_common_op
(
"clip"
,
args
[
0
],
min_val
,
max_val
);
return
info
.
add_common_op
(
"clip"
,
args
[
0
],
min_val
,
max_val
);
}
}
...
...
src/tf/tf_parser.cpp
View file @
3a4d36cf
...
@@ -347,7 +347,7 @@ void tf_parser::parse_node(const std::string& name)
...
@@ -347,7 +347,7 @@ void tf_parser::parse_node(const std::string& name)
// input was from a node with multiple outputs
// input was from a node with multiple outputs
if
(
contains
(
input_name
,
':'
))
if
(
contains
(
input_name
,
':'
))
{
{
input_name
=
input_name
.
substr
(
0
,
input
.
find
(
':'
));
input_name
.
resize
(
input
.
find
(
':'
));
}
}
else
else
{
{
...
@@ -371,7 +371,7 @@ void tf_parser::parse_node(const std::string& name)
...
@@ -371,7 +371,7 @@ void tf_parser::parse_node(const std::string& name)
{
{
result
=
ops
[
node
.
op
()](
*
this
,
{
get_attributes
(
node
),
node
.
op
(),
mm
},
args
);
result
=
ops
[
node
.
op
()](
*
this
,
{
get_attributes
(
node
),
node
.
op
(),
mm
},
args
);
}
}
assert
(
!
result
.
empty
());
assert
(
not
result
.
empty
());
// First output has no ":" delimiter
// First output has no ":" delimiter
instructions
[
name
]
=
result
.
front
();
instructions
[
name
]
=
result
.
front
();
for
(
size_t
i
=
1
;
i
<
result
.
size
();
i
++
)
for
(
size_t
i
=
1
;
i
<
result
.
size
();
i
++
)
...
@@ -458,7 +458,7 @@ literal tf_parser::parse_tensor(const tensorflow::TensorProto& t) const
...
@@ -458,7 +458,7 @@ literal tf_parser::parse_tensor(const tensorflow::TensorProto& t) const
{
{
std
::
vector
<
size_t
>
dims
=
parse_dims
(
t
.
tensor_shape
());
std
::
vector
<
size_t
>
dims
=
parse_dims
(
t
.
tensor_shape
());
size_t
shape_size
=
std
::
accumulate
(
dims
.
begin
(),
dims
.
end
(),
1
,
std
::
multiplies
<
size_t
>
());
size_t
shape_size
=
std
::
accumulate
(
dims
.
begin
(),
dims
.
end
(),
1
,
std
::
multiplies
<
size_t
>
());
if
(
!
t
.
tensor_content
().
empty
())
// has raw data
if
(
not
t
.
tensor_content
().
empty
())
// has raw data
{
{
const
std
::
string
&
s
=
t
.
tensor_content
();
const
std
::
string
&
s
=
t
.
tensor_content
();
switch
(
t
.
dtype
())
switch
(
t
.
dtype
())
...
...
src/tmp_dir.cpp
View file @
3a4d36cf
...
@@ -78,7 +78,7 @@ void tmp_dir::execute(const std::string& exe, const std::string& args) const
...
@@ -78,7 +78,7 @@ void tmp_dir::execute(const std::string& exe, const std::string& args) const
tmp_dir
::~
tmp_dir
()
tmp_dir
::~
tmp_dir
()
{
{
if
(
!
enabled
(
MIGRAPHX_DEBUG_SAVE_TEMP_DIR
{}))
if
(
not
enabled
(
MIGRAPHX_DEBUG_SAVE_TEMP_DIR
{}))
{
{
fs
::
remove_all
(
this
->
path
);
fs
::
remove_all
(
this
->
path
);
}
}
...
...
src/value.cpp
View file @
3a4d36cf
...
@@ -400,7 +400,7 @@ std::pair<value*, bool> value::insert(const value& v)
...
@@ -400,7 +400,7 @@ std::pair<value*, bool> value::insert(const value& v)
{
{
if
(
v
.
key
.
empty
())
if
(
v
.
key
.
empty
())
{
{
if
(
!
x
)
if
(
not
x
)
x
=
std
::
make_shared
<
array_value_holder
>
();
x
=
std
::
make_shared
<
array_value_holder
>
();
get_array_impl
(
x
).
push_back
(
v
);
get_array_impl
(
x
).
push_back
(
v
);
assert
(
this
->
if_array
());
assert
(
this
->
if_array
());
...
@@ -408,7 +408,7 @@ std::pair<value*, bool> value::insert(const value& v)
...
@@ -408,7 +408,7 @@ std::pair<value*, bool> value::insert(const value& v)
}
}
else
else
{
{
if
(
!
x
)
if
(
not
x
)
x
=
std
::
make_shared
<
object_value_holder
>
();
x
=
std
::
make_shared
<
object_value_holder
>
();
auto
p
=
x
->
if_object
()
->
emplace
(
v
.
key
,
get_array_impl
(
x
).
size
());
auto
p
=
x
->
if_object
()
->
emplace
(
v
.
key
,
get_array_impl
(
x
).
size
());
if
(
p
.
second
)
if
(
p
.
second
)
...
@@ -420,7 +420,7 @@ std::pair<value*, bool> value::insert(const value& v)
...
@@ -420,7 +420,7 @@ std::pair<value*, bool> value::insert(const value& v)
value
*
value
::
insert
(
const
value
*
pos
,
const
value
&
v
)
value
*
value
::
insert
(
const
value
*
pos
,
const
value
&
v
)
{
{
assert
(
v
.
key
.
empty
());
assert
(
v
.
key
.
empty
());
if
(
!
x
)
if
(
not
x
)
x
=
std
::
make_shared
<
array_value_holder
>
();
x
=
std
::
make_shared
<
array_value_holder
>
();
auto
&&
a
=
get_array_impl
(
x
);
auto
&&
a
=
get_array_impl
(
x
);
auto
it
=
a
.
insert
(
a
.
begin
()
+
(
pos
-
begin
()),
v
);
auto
it
=
a
.
insert
(
a
.
begin
()
+
(
pos
-
begin
()),
v
);
...
@@ -466,7 +466,7 @@ bool compare(const value& x, const value& y, F f)
...
@@ -466,7 +466,7 @@ bool compare(const value& x, const value& y, F f)
value
::
type_t
value
::
get_type
()
const
value
::
type_t
value
::
get_type
()
const
{
{
if
(
!
x
)
if
(
not
x
)
return
null_type
;
return
null_type
;
return
x
->
get_type
();
return
x
->
get_type
();
}
}
...
@@ -511,14 +511,7 @@ void print_value(std::ostream& os, const std::vector<value>& x)
...
@@ -511,14 +511,7 @@ void print_value(std::ostream& os, const std::vector<value>& x)
os
<<
"}"
;
os
<<
"}"
;
}
}
void
print_value
(
std
::
ostream
&
os
,
const
value
::
binary
&
x
)
void
print_value
(
std
::
ostream
&
os
,
const
value
::
binary
&
x
)
{
os
<<
x
;
}
{
// Convert binary to integers
std
::
vector
<
int
>
v
(
x
.
begin
(),
x
.
end
());
os
<<
"{"
;
os
<<
to_string_range
(
v
);
os
<<
"}"
;
}
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
const
value
&
d
)
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
const
value
&
d
)
{
{
...
...
test/api/test_custom_op_gpu.cpp
View file @
3a4d36cf
...
@@ -55,7 +55,7 @@ struct simple_custom_op final : migraphx::experimental_custom_op_base
...
@@ -55,7 +55,7 @@ struct simple_custom_op final : migraphx::experimental_custom_op_base
virtual
migraphx
::
shape
compute_shape
(
migraphx
::
shapes
inputs
)
const
override
virtual
migraphx
::
shape
compute_shape
(
migraphx
::
shapes
inputs
)
const
override
{
{
if
(
!
inputs
[
0
].
standard
())
if
(
not
inputs
[
0
].
standard
())
{
{
throw
std
::
runtime_error
(
"first arg must be standard shaped"
);
throw
std
::
runtime_error
(
"first arg must be standard shaped"
);
}
}
...
...
test/check_shapes_test.cpp
View file @
3a4d36cf
...
@@ -49,6 +49,6 @@ bool create_shapes(bool dynamic_allowed)
...
@@ -49,6 +49,6 @@ bool create_shapes(bool dynamic_allowed)
TEST_CASE
(
allow_dynamic_shape
)
{
EXPECT
(
create_shapes
(
true
));
}
TEST_CASE
(
allow_dynamic_shape
)
{
EXPECT
(
create_shapes
(
true
));
}
TEST_CASE
(
fail_dynamic_shape
)
{
EXPECT
(
!
create_shapes
(
false
));
}
TEST_CASE
(
fail_dynamic_shape
)
{
EXPECT
(
not
create_shapes
(
false
));
}
int
main
(
int
argc
,
const
char
*
argv
[])
{
test
::
run
(
argc
,
argv
);
}
int
main
(
int
argc
,
const
char
*
argv
[])
{
test
::
run
(
argc
,
argv
);
}
test/eval_test.cpp
View file @
3a4d36cf
...
@@ -187,7 +187,7 @@ TEST_CASE(print_test)
...
@@ -187,7 +187,7 @@ TEST_CASE(print_test)
std
::
stringstream
ss
;
std
::
stringstream
ss
;
ss
<<
p
;
ss
<<
p
;
std
::
string
s
=
ss
.
str
();
std
::
string
s
=
ss
.
str
();
EXPECT
(
!
s
.
empty
());
EXPECT
(
not
s
.
empty
());
}
}
TEST_CASE
(
param_test
)
TEST_CASE
(
param_test
)
...
...
test/get_target_assignments.cpp
→
test/
fpga/
get_target_assignments.cpp
View file @
3a4d36cf
...
@@ -26,8 +26,9 @@
...
@@ -26,8 +26,9 @@
#include <migraphx/make_op.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/program.hpp>
#include <migraphx/program.hpp>
#include <migraphx/register_target.hpp>
#include <migraphx/register_target.hpp>
#include <migraphx/
ref
/target.hpp>
#include <migraphx/
fpga
/target.hpp>
#include <migraphx/target_assignments.hpp>
#include <migraphx/target_assignments.hpp>
#include <migraphx/iterator_for.hpp>
migraphx
::
program
create_program
()
migraphx
::
program
create_program
()
{
{
...
@@ -37,8 +38,8 @@ migraphx::program create_program()
...
@@ -37,8 +38,8 @@ migraphx::program create_program()
auto
x
=
mm
->
add_parameter
(
"x"
,
s
);
auto
x
=
mm
->
add_parameter
(
"x"
,
s
);
auto
y
=
mm
->
add_parameter
(
"y"
,
s
);
auto
y
=
mm
->
add_parameter
(
"y"
,
s
);
auto
z
=
mm
->
add_parameter
(
"z"
,
s
);
auto
z
=
mm
->
add_parameter
(
"z"
,
s
);
auto
diff
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"
div
"
),
x
,
y
);
auto
diff
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"
add
"
),
x
,
y
);
mm
->
add_instruction
(
migraphx
::
make_op
(
"
div
"
),
diff
,
z
);
mm
->
add_instruction
(
migraphx
::
make_op
(
"
add
"
),
diff
,
z
);
return
p
;
return
p
;
}
}
...
@@ -46,15 +47,17 @@ TEST_CASE(is_supported)
...
@@ -46,15 +47,17 @@ TEST_CASE(is_supported)
{
{
auto
p
=
create_program
();
auto
p
=
create_program
();
auto
targets
=
migraphx
::
get_targets
();
auto
targets
=
migraphx
::
get_targets
();
EXPECT
(
!
targets
.
empty
());
EXPECT
(
not
targets
.
empty
());
auto
first_target
=
targets
[
0
];
auto
t
=
migraphx
::
make_target
(
"fpga"
);
auto
t
=
migraphx
::
make_target
(
first_target
);
const
auto
assignments
=
p
.
get_target_assignments
({
t
});
const
auto
assignments
=
p
.
get_target_assignments
({
t
});
for
(
const
auto
&
[
ins
,
target
]
:
assignments
)
const
auto
*
mod
=
p
.
get_main_module
();
EXPECT
(
mod
->
size
()
==
assignments
.
size
());
for
(
const
auto
ins
:
iterator_for
(
*
mod
))
{
{
(
void
)
ins
;
const
auto
&
target
=
assignments
.
at
(
ins
)
;
EXPECT
(
target
==
first_target
);
EXPECT
(
target
==
"fpga"
);
}
}
}
}
...
...
test/fuse_pointwise.cpp
View file @
3a4d36cf
...
@@ -21,7 +21,7 @@
...
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* THE SOFTWARE.
*/
*/
#include
"
migraphx/dead_code_elimination.hpp
"
#include
<
migraphx/dead_code_elimination.hpp
>
#include <migraphx/fuse_pointwise.hpp>
#include <migraphx/fuse_pointwise.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/pass_manager.hpp>
#include <migraphx/pass_manager.hpp>
...
...
test/gpu/adjust_allocation.cpp
View file @
3a4d36cf
...
@@ -40,6 +40,10 @@
...
@@ -40,6 +40,10 @@
#include <migraphx/make_op.hpp>
#include <migraphx/make_op.hpp>
#include <basic_ops.hpp>
#include <basic_ops.hpp>
#include <test.hpp>
#include <test.hpp>
#include "make_precompile_op.hpp"
// Treat some operators as compilable to enable lowering
MIGRAPHX_GPU_TEST_PRECOMPILE
(
"add"
,
"mul"
,
"convert"
)
void
run_lowering
(
migraphx
::
program
&
p
,
bool
offload_copy
=
false
)
void
run_lowering
(
migraphx
::
program
&
p
,
bool
offload_copy
=
false
)
{
{
...
@@ -118,7 +122,7 @@ TEST_CASE(no_copy_dead_param)
...
@@ -118,7 +122,7 @@ TEST_CASE(no_copy_dead_param)
auto
xb
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"hip::allocate"
,
{{
"shape"
,
to_value
(
s
)}}));
auto
xb
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"hip::allocate"
,
{{
"shape"
,
to_value
(
s
)}}));
auto
gx
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"hip::copy_to_gpu"
),
x
,
xb
);
auto
gx
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"hip::copy_to_gpu"
),
x
,
xb
);
auto
ab
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"hip::allocate"
,
{{
"shape"
,
to_value
(
s
)}}));
auto
ab
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"hip::allocate"
,
{{
"shape"
,
to_value
(
s
)}}));
auto
sum
=
mm
->
add_instruction
(
m
igraphx
::
mak
e_op
(
"
gpu::
add"
),
gx
,
gx
,
ab
);
auto
sum
=
mm
->
add_instruction
(
m
ake_precompil
e_op
(
"add"
),
gx
,
gx
,
ab
);
auto
r
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"hip::copy_from_gpu"
),
sum
);
auto
r
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"hip::copy_from_gpu"
),
sum
);
mm
->
add_return
({
r
});
mm
->
add_return
({
r
});
...
...
test/gpu/jit.cpp
View file @
3a4d36cf
...
@@ -307,12 +307,14 @@ TEST_CASE(compile_math)
...
@@ -307,12 +307,14 @@ TEST_CASE(compile_math)
"erf(x)"
,
"erf(x)"
,
"exp(x)"
,
"exp(x)"
,
"floor(x)"
,
"floor(x)"
,
"fmod(x, x)"
,
"isnan(x)"
,
"isnan(x)"
,
"log(x)"
,
"log(x)"
,
"max(x, x)"
,
"max(x, x)"
,
"min(x, x)"
,
"min(x, x)"
,
"pow(x, 0)"
,
"pow(x, 0)"
,
"pow(x, x)"
,
"pow(x, x)"
,
"remainder(x,x)"
,
"round(x)"
,
"round(x)"
,
"rsqrt(x)"
,
"rsqrt(x)"
,
"sin(x)"
,
"sin(x)"
,
...
...
src/targets/gpu/device/gelu.c
pp
→
test/gpu/make_precompile_op.h
pp
View file @
3a4d36cf
...
@@ -21,63 +21,46 @@
...
@@ -21,63 +21,46 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* THE SOFTWARE.
*/
*/
#include <migraphx/gpu/device/gelu.hpp>
#ifndef MIGRAPHX_GUARD_TEST_GPU_MAKE_PRECOMPILE_OP_HPP
#include <migraphx/gpu/device/nary.hpp>
#define MIGRAPHX_GUARD_TEST_GPU_MAKE_PRECOMPILE_OP_HPP
#include <migraphx/gpu/device/types.hpp>
#include <cmath>
namespace
migraphx
{
#include <migraphx/operation.hpp>
inline
namespace
MIGRAPHX_INLINE_NS
{
#include <migraphx/gpu/compiler.hpp>
namespace
gpu
{
#include <migraphx/make_op.hpp>
namespace
device
{
// x * 0.5 * (1.0 + erf(x / sqrt(2.0)))
// NOLINTNEXTLINE
template
<
class
T
>
#define MIGRAPHX_GPU_TEST_PRECOMPILE(...) \
auto
gelu_fn
(
T
x
)
__device__
struct test_compiler : migraphx::gpu::compiler<test_compiler> \
{
{ \
return
x
*
0.5
*
(
1
+
::
erf
(
x
*
M_SQRT1_2
));
std::vector<std::string> names() const { return {__VA_ARGS__}; } \
}
\
template <class... Ts> \
migraphx::operation compile_op(Ts&&...) const \
{ \
MIGRAPHX_THROW("Not compilable"); \
} \
\
template <class... Ts> \
migraphx::gpu::compiler_replace compile(Ts&&...) const \
{ \
MIGRAPHX_THROW("Not compilable"); \
} \
};
// 0.5 * x * (1 + tanh(sqrt(2 / pi) * (x + 0.044715 * pow(x, 3))))
inline
migraphx
::
operation
make_precompile_op
(
migraphx
::
rank
<
0
>
,
const
migraphx
::
operation
&
op
)
template
<
class
T
>
auto
gelu_fn_new
(
T
x
)
__device__
{
{
return
0.5
*
x
*
(
1
+
tanh
(
sqrt
(
M_2_PI
)
*
(
x
+
0.044715
*
x
*
x
*
x
))
);
return
migraphx
::
make_op
(
"gpu::precompile_op"
,
{{
"op"
,
migraphx
::
to_value
(
op
)}}
);
}
}
void
gelu
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
inline
migraphx
::
operation
make_precompile_op
(
migraphx
::
rank
<
1
>
,
const
std
::
string
&
name
)
{
{
nary
(
stream
,
result
,
arg
)([](
auto
x
)
__device__
{
return
gelu_fn
(
to_hip_type
(
x
));
}
);
return
make_precompile_op
(
migraphx
::
rank
<
0
>
{},
migraphx
::
make_op
(
name
)
);
}
}
void
gelu_new
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
template
<
class
T
>
{
auto
make_precompile_op
(
const
T
&
x
)
nary
(
stream
,
result
,
arg
)([](
auto
x
)
__device__
{
return
gelu_fn_new
(
to_hip_type
(
x
));
});
}
void
add_gelu
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
)
{
nary
(
stream
,
result
,
arg1
,
arg2
)([](
auto
x
,
auto
y
)
__device__
{
auto
sum
=
to_hip_type
(
x
+
y
);
return
gelu_fn
(
sum
);
});
}
void
add_gelu_new
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
)
{
{
nary
(
stream
,
result
,
arg1
,
arg2
)([](
auto
x
,
auto
y
)
__device__
{
return
make_precompile_op
(
migraphx
::
rank
<
1
>
{},
x
);
auto
sum
=
to_hip_type
(
x
+
y
);
return
gelu_fn
(
sum
);
});
}
}
}
// namespace device
#endif // MIGRAPHX_GUARD_TEST_GPU_MAKE_PRECOMPILE_OP_HPP
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
test/gpu/mlir.cpp
View file @
3a4d36cf
...
@@ -37,10 +37,6 @@
...
@@ -37,10 +37,6 @@
#include <migraphx/functional.hpp>
#include <migraphx/functional.hpp>
#include <test.hpp>
#include <test.hpp>
using
migraphx
::
trim
;
// m test_gpu_mlir && ./bin/test_gpu_mlir
struct
mlir_gpu_target
:
migraphx
::
gpu
::
target
struct
mlir_gpu_target
:
migraphx
::
gpu
::
target
{
{
std
::
string
name
()
const
{
return
"mlir"
;
}
std
::
string
name
()
const
{
return
"mlir"
;
}
...
@@ -144,7 +140,7 @@ TEST_CASE(conv)
...
@@ -144,7 +140,7 @@ TEST_CASE(conv)
{
{
const
std
::
string
mlir_output
=
R"__migraphx__(
const
std
::
string
mlir_output
=
R"__migraphx__(
module {
module {
func @main(%arg0: tensor<2x8x3x3xf32>, %arg1: tensor<1x8x4x4xf32>) -> tensor<1x2x2x2xf32> attributes {kernel = "mixr"} {
func
.func
@main(%arg0: tensor<2x8x3x3xf32>, %arg1: tensor<1x8x4x4xf32>) -> tensor<1x2x2x2xf32> attributes {kernel = "mixr"} {
%0 = migraphx.convolution(%arg1, %arg0) {dilation = [1, 1], group = 1 : i64, padding = [0, 0, 0, 0], padding_mode = 0 : i64, stride = [1, 1], use_dynamic_same_auto_pad = 0 : i64} : (tensor<1x8x4x4xf32>, tensor<2x8x3x3xf32>) -> tensor<1x2x2x2xf32>
%0 = migraphx.convolution(%arg1, %arg0) {dilation = [1, 1], group = 1 : i64, padding = [0, 0, 0, 0], padding_mode = 0 : i64, stride = [1, 1], use_dynamic_same_auto_pad = 0 : i64} : (tensor<1x8x4x4xf32>, tensor<2x8x3x3xf32>) -> tensor<1x2x2x2xf32>
return %0 : tensor<1x2x2x2xf32>
return %0 : tensor<1x2x2x2xf32>
}
}
...
@@ -167,7 +163,7 @@ TEST_CASE(conv_add_relu)
...
@@ -167,7 +163,7 @@ TEST_CASE(conv_add_relu)
{
{
const
std
::
string
mlir_output
=
R"__migraphx__(
const
std
::
string
mlir_output
=
R"__migraphx__(
module {
module {
func @main(%arg0: tensor<1x2x2x2xf32>, %arg1: tensor<2x8x3x3xf32>, %arg2: tensor<1x8x4x4xf32>) -> tensor<1x2x2x2xf32> attributes {kernel = "mixr"} {
func
.func
@main(%arg0: tensor<1x2x2x2xf32>, %arg1: tensor<2x8x3x3xf32>, %arg2: tensor<1x8x4x4xf32>) -> tensor<1x2x2x2xf32> attributes {kernel = "mixr"} {
%0 = migraphx.convolution(%arg2, %arg1) {dilation = [1, 1], group = 1 : i64, padding = [0, 0, 0, 0], padding_mode = 0 : i64, stride = [1, 1], use_dynamic_same_auto_pad = 0 : i64} : (tensor<1x8x4x4xf32>, tensor<2x8x3x3xf32>) -> tensor<1x2x2x2xf32>
%0 = migraphx.convolution(%arg2, %arg1) {dilation = [1, 1], group = 1 : i64, padding = [0, 0, 0, 0], padding_mode = 0 : i64, stride = [1, 1], use_dynamic_same_auto_pad = 0 : i64} : (tensor<1x8x4x4xf32>, tensor<2x8x3x3xf32>) -> tensor<1x2x2x2xf32>
%1 = migraphx.add(%0, %arg0) : (tensor<1x2x2x2xf32>, tensor<1x2x2x2xf32>) -> tensor<1x2x2x2xf32>
%1 = migraphx.add(%0, %arg0) : (tensor<1x2x2x2xf32>, tensor<1x2x2x2xf32>) -> tensor<1x2x2x2xf32>
%2 = migraphx.relu(%1) : (tensor<1x2x2x2xf32>) -> tensor<1x2x2x2xf32>
%2 = migraphx.relu(%1) : (tensor<1x2x2x2xf32>) -> tensor<1x2x2x2xf32>
...
...
test/gpu/pack_int8_args.cpp
View file @
3a4d36cf
...
@@ -21,7 +21,7 @@
...
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* THE SOFTWARE.
*/
*/
#include
"
migraphx/instruction_ref.hpp
"
#include
<
migraphx/instruction_ref.hpp
>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/lowering.hpp>
#include <migraphx/gpu/lowering.hpp>
#include <migraphx/gpu/target.hpp>
#include <migraphx/gpu/target.hpp>
...
@@ -30,6 +30,7 @@
...
@@ -30,6 +30,7 @@
#include <migraphx/adjust_allocation.hpp>
#include <migraphx/adjust_allocation.hpp>
#include <migraphx/gpu/pack_int8_args.hpp>
#include <migraphx/gpu/pack_int8_args.hpp>
#include <migraphx/gpu/rocblas.hpp>
#include <migraphx/gpu/rocblas.hpp>
#include <migraphx/gpu/device_name.hpp>
#include <migraphx/auto_contiguous.hpp>
#include <migraphx/auto_contiguous.hpp>
#include <migraphx/dead_code_elimination.hpp>
#include <migraphx/dead_code_elimination.hpp>
#include <migraphx/replace_allocate.hpp>
#include <migraphx/replace_allocate.hpp>
...
@@ -38,10 +39,13 @@
...
@@ -38,10 +39,13 @@
#include <migraphx/pass_manager.hpp>
#include <migraphx/pass_manager.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/make_op.hpp>
#include <test.hpp>
#include <test.hpp>
#include "make_precompile_op.hpp"
void
run_passes
(
migraphx
::
module
&
m
)
// Treat some operators as compilable to enable lowering
MIGRAPHX_GPU_TEST_PRECOMPILE
(
"add"
,
"mul"
,
"convert"
)
void
run_passes
(
migraphx
::
module
&
m
,
migraphx
::
gpu
::
context
&
ctx
)
{
{
auto
ctx
=
migraphx
::
gpu
::
context
{};
migraphx
::
run_passes
(
m
,
migraphx
::
run_passes
(
m
,
{
migraphx
::
auto_contiguous
{},
{
migraphx
::
auto_contiguous
{},
migraphx
::
gpu
::
lowering
{
&
ctx
,
false
},
migraphx
::
gpu
::
lowering
{
&
ctx
,
false
},
...
@@ -52,18 +56,6 @@ void run_passes(migraphx::module& m)
...
@@ -52,18 +56,6 @@ void run_passes(migraphx::module& m)
migraphx
::
dead_code_elimination
{}});
migraphx
::
dead_code_elimination
{}});
}
}
bool
get_int8_x4_format
()
{
bool
int8_x4_format
=
true
;
#if ROCBLAS_VERSION_MAJOR >= 2 && ROCBLAS_VERSION_MINOR >= 38
auto
ctx
=
migraphx
::
gpu
::
context
{};
rocblas_gemm_flags
flag
;
rocblas_query_int8_layout_flag
(
ctx
.
get_stream
().
get_rocblas
(),
&
flag
);
int8_x4_format
=
(
flag
==
rocblas_gemm_flags_pack_int8x4
);
#endif
return
int8_x4_format
;
}
TEST_CASE
(
quant_dot
)
TEST_CASE
(
quant_dot
)
{
{
auto
create_module
=
[]
{
auto
create_module
=
[]
{
...
@@ -102,11 +94,13 @@ TEST_CASE(quant_dot)
...
@@ -102,11 +94,13 @@ TEST_CASE(quant_dot)
migraphx
::
make_op
(
"hip::allocate"
,
{{
"shape"
,
migraphx
::
to_value
(
m2_shape
)}}));
migraphx
::
make_op
(
"hip::allocate"
,
{{
"shape"
,
migraphx
::
to_value
(
m2_shape
)}}));
packa
=
m
.
add_instruction
(
migraphx
::
make_op
(
"gpu::int8_gemm_pack_a"
),
l2
,
alloc
);
packa
=
m
.
add_instruction
(
migraphx
::
make_op
(
"gpu::int8_gemm_pack_a"
),
l2
,
alloc
);
}
}
auto
gemm
=
auto
gemm
=
m
.
add_instruction
(
m
.
add_instruction
(
migraphx
::
make_op
(
"gpu::quant_gemm"
,
{{
"int8_x4_format"
,
int8_x4
}}),
migraphx
::
make_op
(
"gpu::quant_gemm"
,
l1
,
{{
"int8_x4_format"
,
int8_x4
},
packa
,
{
"compute_fp32"
,
migraphx
::
gpu
::
get_compute_fp32_flag
()}}),
gemm_alloc
);
l1
,
packa
,
gemm_alloc
);
auto
beta_broadcast
=
m
.
add_instruction
(
auto
beta_broadcast
=
m
.
add_instruction
(
migraphx
::
make_op
(
"multibroadcast"
,
{{
"out_lens"
,
m3_shape
.
lens
()}}),
beta
);
migraphx
::
make_op
(
"multibroadcast"
,
{{
"out_lens"
,
m3_shape
.
lens
()}}),
beta
);
...
@@ -116,19 +110,19 @@ TEST_CASE(quant_dot)
...
@@ -116,19 +110,19 @@ TEST_CASE(quant_dot)
m
.
add_instruction
(
migraphx
::
make_op
(
"gpu::contiguous"
),
beta_broadcast
,
beta_alloc
);
m
.
add_instruction
(
migraphx
::
make_op
(
"gpu::contiguous"
),
beta_broadcast
,
beta_alloc
);
auto
mul_alloc
=
m
.
add_instruction
(
auto
mul_alloc
=
m
.
add_instruction
(
migraphx
::
make_op
(
"hip::allocate"
,
{{
"shape"
,
migraphx
::
to_value
(
m3_shape
)}}));
migraphx
::
make_op
(
"hip::allocate"
,
{{
"shape"
,
migraphx
::
to_value
(
m3_shape
)}}));
auto
m3_beta
=
auto
m3_beta
=
m
.
add_instruction
(
make_precompile_op
(
"mul"
),
l3
,
beta_contiguous
,
mul_alloc
);
m
.
add_instruction
(
migraphx
::
make_op
(
"gpu::mul"
),
l3
,
beta_contiguous
,
mul_alloc
);
auto
gemm_add
=
m
.
add_instruction
(
make_precompile_op
(
"add"
),
gemm
,
m3_beta
,
output
);
auto
gemm_add
=
m
.
add_instruction
(
migraphx
::
make_op
(
"gpu::add"
),
gemm
,
m3_beta
,
output
);
m
.
add_return
({
gemm_add
});
m
.
add_return
({
gemm_add
});
return
m
;
return
m
;
};
};
auto
m1
=
create_module
();
auto
m1
=
create_module
();
run_passes
(
m1
);
auto
ctx
=
migraphx
::
gpu
::
context
{};
run_passes
(
m1
,
ctx
);
bool
flag
=
get_int8_x4_format
();
bool
int8_x4
=
migraphx
::
gpu
::
get_int8_x4_format
(
ctx
);
auto
m2
=
create_optimized_int8_x4
(
flag
);
auto
m2
=
create_optimized_int8_x4
(
int8_x4
);
EXPECT
(
m1
==
m2
);
EXPECT
(
m1
==
m2
);
}
}
...
@@ -187,21 +181,23 @@ TEST_CASE(quant_dot_trans)
...
@@ -187,21 +181,23 @@ TEST_CASE(quant_dot_trans)
// back result to int8
// back result to int8
auto
tl1_convert_alloc
=
m
.
add_instruction
(
migraphx
::
make_op
(
auto
tl1_convert_alloc
=
m
.
add_instruction
(
migraphx
::
make_op
(
"hip::allocate"
,
{{
"shape"
,
migraphx
::
to_value
(
alpha_contiguous
->
get_shape
())}}));
"hip::allocate"
,
{{
"shape"
,
migraphx
::
to_value
(
alpha_contiguous
->
get_shape
())}}));
auto
tl1_convert
=
m
.
add_instruction
(
auto
tl1_convert
=
migraphx
::
make_op
(
"gpu::convert"
,
{{
"target_type"
,
alpha
->
get_shape
().
type
()}}),
m
.
add_instruction
(
make_precompile_op
(
migraphx
::
make_op
(
conta
,
"convert"
,
{{
"target_type"
,
alpha
->
get_shape
().
type
()}})),
tl1_convert_alloc
);
conta
,
auto
mul_alloc
=
m
.
add_instruction
(
migraphx
::
make_op
(
tl1_convert_alloc
);
auto
mul_alloc
=
m
.
add_instruction
(
migraphx
::
make_op
(
"hip::allocate"
,
{{
"shape"
,
migraphx
::
to_value
(
tl1_convert
->
get_shape
())}}));
"hip::allocate"
,
{{
"shape"
,
migraphx
::
to_value
(
tl1_convert
->
get_shape
())}}));
auto
tl1_alpha_int32
=
m
.
add_instruction
(
auto
tl1_alpha_int32
=
m
igraphx
::
make_op
(
"gpu::
mul"
),
alpha_contiguous
,
tl1_convert
,
mul_alloc
);
m
.
add_instruction
(
make_precompile_op
(
"
mul"
),
alpha_contiguous
,
tl1_convert
,
mul_alloc
);
// convert mul_res to int8
// convert mul_res to int8
auto
tl1_alpha_int8_alloc
=
m
.
add_instruction
(
migraphx
::
make_op
(
auto
tl1_alpha_int8_alloc
=
m
.
add_instruction
(
migraphx
::
make_op
(
"hip::allocate"
,
{{
"shape"
,
migraphx
::
to_value
(
conta
->
get_shape
())}}));
"hip::allocate"
,
{{
"shape"
,
migraphx
::
to_value
(
conta
->
get_shape
())}}));
auto
tl1_alpha_int8
=
m
.
add_instruction
(
auto
tl1_alpha_int8
=
migraphx
::
make_op
(
"gpu::convert"
,
{{
"target_type"
,
conta
->
get_shape
().
type
()}}),
m
.
add_instruction
(
make_precompile_op
(
migraphx
::
make_op
(
tl1_alpha_int32
,
"convert"
,
{{
"target_type"
,
conta
->
get_shape
().
type
()}})),
tl1_alpha_int8_alloc
);
tl1_alpha_int32
,
tl1_alpha_int8_alloc
);
auto
packb
=
contb
;
auto
packb
=
contb
;
if
(
int8_x4
)
if
(
int8_x4
)
...
@@ -211,21 +207,24 @@ TEST_CASE(quant_dot_trans)
...
@@ -211,21 +207,24 @@ TEST_CASE(quant_dot_trans)
packb
=
m
.
add_instruction
(
migraphx
::
make_op
(
"gpu::int8_gemm_pack_a"
),
contb
,
allocpb
);
packb
=
m
.
add_instruction
(
migraphx
::
make_op
(
"gpu::int8_gemm_pack_a"
),
contb
,
allocpb
);
}
}
auto
gemm
=
auto
gemm
=
m
.
add_instruction
(
m
.
add_instruction
(
migraphx
::
make_op
(
"gpu::quant_gemm"
,
{{
"int8_x4_format"
,
int8_x4
}}),
migraphx
::
make_op
(
"gpu::quant_gemm"
,
tl1_alpha_int8
,
{{
"int8_x4_format"
,
int8_x4
},
packb
,
{
"compute_fp32"
,
migraphx
::
gpu
::
get_compute_fp32_flag
()}}),
output
);
tl1_alpha_int8
,
packb
,
output
);
m
.
add_return
({
gemm
});
m
.
add_return
({
gemm
});
return
m
;
return
m
;
};
};
auto
m1
=
create_module
();
auto
m1
=
create_module
();
bool
flag
=
get_int8_x4_format
()
;
auto
ctx
=
migraphx
::
gpu
::
context
{}
;
auto
m2
=
create_optimized_int8_x4
(
flag
);
run_passes
(
m1
,
ctx
);
run_passes
(
m1
);
bool
int8_x4
=
migraphx
::
gpu
::
get_int8_x4_format
(
ctx
);
auto
m2
=
create_optimized_int8_x4
(
int8_x4
);
EXPECT
(
m1
==
m2
);
EXPECT
(
m1
==
m2
);
}
}
...
@@ -292,11 +291,13 @@ TEST_CASE(quant_dot_pad)
...
@@ -292,11 +291,13 @@ TEST_CASE(quant_dot_pad)
packa
=
m
.
add_instruction
(
migraphx
::
make_op
(
"gpu::int8_gemm_pack_a"
),
pl2
,
alloc
);
packa
=
m
.
add_instruction
(
migraphx
::
make_op
(
"gpu::int8_gemm_pack_a"
),
pl2
,
alloc
);
}
}
auto
gemm
=
auto
gemm
=
m
.
add_instruction
(
m
.
add_instruction
(
migraphx
::
make_op
(
"gpu::quant_gemm"
,
{{
"int8_x4_format"
,
int8_x4
}}),
migraphx
::
make_op
(
"gpu::quant_gemm"
,
pl1
,
{{
"int8_x4_format"
,
int8_x4
},
packa
,
{
"compute_fp32"
,
migraphx
::
gpu
::
get_compute_fp32_flag
()}}),
gemm_alloc
);
pl1
,
packa
,
gemm_alloc
);
auto
beta_broadcast
=
auto
beta_broadcast
=
m
.
add_instruction
(
migraphx
::
make_op
(
"multibroadcast"
,
{{
"out_lens"
,
s3
.
lens
()}}),
beta
);
m
.
add_instruction
(
migraphx
::
make_op
(
"multibroadcast"
,
{{
"out_lens"
,
s3
.
lens
()}}),
beta
);
...
@@ -306,18 +307,18 @@ TEST_CASE(quant_dot_pad)
...
@@ -306,18 +307,18 @@ TEST_CASE(quant_dot_pad)
m
.
add_instruction
(
migraphx
::
make_op
(
"gpu::contiguous"
),
beta_broadcast
,
beta_alloc
);
m
.
add_instruction
(
migraphx
::
make_op
(
"gpu::contiguous"
),
beta_broadcast
,
beta_alloc
);
auto
mul_alloc
=
m
.
add_instruction
(
auto
mul_alloc
=
m
.
add_instruction
(
migraphx
::
make_op
(
"hip::allocate"
,
{{
"shape"
,
migraphx
::
to_value
(
s3
)}}));
migraphx
::
make_op
(
"hip::allocate"
,
{{
"shape"
,
migraphx
::
to_value
(
s3
)}}));
auto
m3_beta
=
auto
m3_beta
=
m
.
add_instruction
(
make_precompile_op
(
"mul"
),
l3
,
beta_contiguous
,
mul_alloc
);
m
.
add_instruction
(
migraphx
::
make_op
(
"gpu::mul"
),
l3
,
beta_contiguous
,
mul_alloc
);
auto
gemm_add
=
m
.
add_instruction
(
make_precompile_op
(
"add"
),
gemm
,
m3_beta
,
output
);
auto
gemm_add
=
m
.
add_instruction
(
migraphx
::
make_op
(
"gpu::add"
),
gemm
,
m3_beta
,
output
);
m
.
add_return
({
gemm_add
});
m
.
add_return
({
gemm_add
});
return
m
;
return
m
;
};
};
auto
m1
=
create_module
();
auto
m1
=
create_module
();
bool
flag
=
get_int8_x4_format
()
;
auto
ctx
=
migraphx
::
gpu
::
context
{}
;
auto
m2
=
create_optimized_int8_x4
(
flag
);
run_passes
(
m1
,
ctx
);
run_passes
(
m1
);
bool
int8_x4
=
migraphx
::
gpu
::
get_int8_x4_format
(
ctx
);
auto
m2
=
create_optimized_int8_x4
(
int8_x4
);
EXPECT
(
m1
==
m2
);
EXPECT
(
m1
==
m2
);
}
}
...
@@ -396,14 +397,15 @@ TEST_CASE(quant_dot_trans_pad)
...
@@ -396,14 +397,15 @@ TEST_CASE(quant_dot_trans_pad)
// back result to int8
// back result to int8
auto
tl1_convert_alloc
=
m
.
add_instruction
(
migraphx
::
make_op
(
auto
tl1_convert_alloc
=
m
.
add_instruction
(
migraphx
::
make_op
(
"hip::allocate"
,
{{
"shape"
,
migraphx
::
to_value
(
alpha_contiguous
->
get_shape
())}}));
"hip::allocate"
,
{{
"shape"
,
migraphx
::
to_value
(
alpha_contiguous
->
get_shape
())}}));
auto
tl1_convert
=
m
.
add_instruction
(
auto
tl1_convert
=
migraphx
::
make_op
(
"gpu::convert"
,
{{
"target_type"
,
alpha
->
get_shape
().
type
()}}),
m
.
add_instruction
(
make_precompile_op
(
migraphx
::
make_op
(
conta
,
"convert"
,
{{
"target_type"
,
alpha
->
get_shape
().
type
()}})),
tl1_convert_alloc
);
conta
,
auto
mul_alloc
=
m
.
add_instruction
(
migraphx
::
make_op
(
tl1_convert_alloc
);
auto
mul_alloc
=
m
.
add_instruction
(
migraphx
::
make_op
(
"hip::allocate"
,
{{
"shape"
,
migraphx
::
to_value
(
tl1_convert
->
get_shape
())}}));
"hip::allocate"
,
{{
"shape"
,
migraphx
::
to_value
(
tl1_convert
->
get_shape
())}}));
auto
tl1_alpha_int32
=
m
.
add_instruction
(
auto
tl1_alpha_int32
=
m
igraphx
::
make_op
(
"gpu::
mul"
),
alpha_contiguous
,
tl1_convert
,
mul_alloc
);
m
.
add_instruction
(
make_precompile_op
(
"
mul"
),
alpha_contiguous
,
tl1_convert
,
mul_alloc
);
// convert mul_res to int8
// convert mul_res to int8
auto
tl1_alpha_int8_alloc
=
m
.
add_instruction
(
migraphx
::
make_op
(
auto
tl1_alpha_int8_alloc
=
m
.
add_instruction
(
migraphx
::
make_op
(
"hip::allocate"
,
{{
"shape"
,
migraphx
::
to_value
(
conta
->
get_shape
())}}));
"hip::allocate"
,
{{
"shape"
,
migraphx
::
to_value
(
conta
->
get_shape
())}}));
...
@@ -415,10 +417,11 @@ TEST_CASE(quant_dot_trans_pad)
...
@@ -415,10 +417,11 @@ TEST_CASE(quant_dot_trans_pad)
migraphx
::
make_op
(
"hip::allocate"
,
{{
"shape"
,
migraphx
::
to_value
(
ps1
)}}));
migraphx
::
make_op
(
"hip::allocate"
,
{{
"shape"
,
migraphx
::
to_value
(
ps1
)}}));
}
}
auto
tl1_alpha_int8
=
m
.
add_instruction
(
auto
tl1_alpha_int8
=
migraphx
::
make_op
(
"gpu::convert"
,
{{
"target_type"
,
conta
->
get_shape
().
type
()}}),
m
.
add_instruction
(
make_precompile_op
(
migraphx
::
make_op
(
tl1_alpha_int32
,
"convert"
,
{{
"target_type"
,
conta
->
get_shape
().
type
()}})),
tl1_alpha_int8_alloc
);
tl1_alpha_int32
,
tl1_alpha_int8_alloc
);
auto
pa
=
tl1_alpha_int8
;
auto
pa
=
tl1_alpha_int8
;
if
(
int8_x4
)
if
(
int8_x4
)
...
@@ -438,17 +441,23 @@ TEST_CASE(quant_dot_trans_pad)
...
@@ -438,17 +441,23 @@ TEST_CASE(quant_dot_trans_pad)
}
}
auto
gemm
=
m
.
add_instruction
(
auto
gemm
=
m
.
add_instruction
(
migraphx
::
make_op
(
"gpu::quant_gemm"
,
{{
"int8_x4_format"
,
int8_x4
}}),
pa
,
packb
,
output
);
migraphx
::
make_op
(
"gpu::quant_gemm"
,
{{
"int8_x4_format"
,
int8_x4
},
{
"compute_fp32"
,
migraphx
::
gpu
::
get_compute_fp32_flag
()}}),
pa
,
packb
,
output
);
m
.
add_return
({
gemm
});
m
.
add_return
({
gemm
});
return
m
;
return
m
;
};
};
auto
m1
=
create_module
();
auto
m1
=
create_module
();
bool
flag
=
get_int8_x4_format
()
;
auto
ctx
=
migraphx
::
gpu
::
context
{}
;
auto
m2
=
create_optimized_int8_x4
(
flag
);
run_passes
(
m1
,
ctx
);
run_passes
(
m1
);
bool
int8_x4
=
migraphx
::
gpu
::
get_int8_x4_format
(
ctx
);
auto
m2
=
create_optimized_int8_x4
(
int8_x4
);
EXPECT
(
m1
==
m2
);
EXPECT
(
m1
==
m2
);
}
}
...
...
test/include/basic_ops.hpp
View file @
3a4d36cf
...
@@ -112,12 +112,12 @@ struct mod_pass_op
...
@@ -112,12 +112,12 @@ struct mod_pass_op
migraphx
::
shape
compute_shape
(
std
::
vector
<
migraphx
::
shape
>
inputs
,
migraphx
::
shape
compute_shape
(
std
::
vector
<
migraphx
::
shape
>
inputs
,
std
::
vector
<
migraphx
::
module_ref
>
mods
)
const
std
::
vector
<
migraphx
::
module_ref
>
mods
)
const
{
{
if
(
!
mods
.
empty
())
if
(
not
mods
.
empty
())
{
{
auto
out_shapes
=
mods
[
0
]
->
get_output_shapes
();
auto
out_shapes
=
mods
[
0
]
->
get_output_shapes
();
return
out_shapes
[
0
];
return
out_shapes
[
0
];
}
}
if
(
!
inputs
.
empty
())
if
(
not
inputs
.
empty
())
{
{
return
inputs
.
front
();
return
inputs
.
front
();
}
}
...
@@ -186,9 +186,10 @@ struct nop
...
@@ -186,9 +186,10 @@ struct nop
migraphx
::
shape
compute_shape
(
const
std
::
vector
<
migraphx
::
shape
>&
)
const
{
return
{};
}
migraphx
::
shape
compute_shape
(
const
std
::
vector
<
migraphx
::
shape
>&
)
const
{
return
{};
}
};
};
inline
migraphx
::
literal
get_2x2
()
inline
migraphx
::
literal
get_2x2
(
int
base
=
0
)
{
{
return
migraphx
::
literal
{{
migraphx
::
shape
::
float_type
,
{
2
,
2
}},
{
1
,
2
,
3
,
4
}};
return
migraphx
::
literal
{{
migraphx
::
shape
::
float_type
,
{
2
,
2
}},
{
base
+
1
,
base
+
2
,
base
+
3
,
base
+
4
}};
}
}
inline
migraphx
::
literal
get_2x2_transposed
()
inline
migraphx
::
literal
get_2x2_transposed
()
...
...
test/include/test.hpp
View file @
3a4d36cf
...
@@ -108,15 +108,7 @@ struct function
...
@@ -108,15 +108,7 @@ struct function
};
};
template
<
class
Stream
,
class
Iterator
>
template
<
class
Stream
,
class
Iterator
>
inline
Stream
&
stream_range
(
Stream
&
s
,
Iterator
start
,
Iterator
last
)
Stream
&
stream_range
(
Stream
&
s
,
Iterator
start
,
Iterator
last
);
{
if
(
start
!=
last
)
{
s
<<
*
start
;
std
::
for_each
(
std
::
next
(
start
),
last
,
[
&
](
auto
&&
x
)
{
s
<<
", "
<<
x
;
});
}
return
s
;
}
template
<
class
Stream
>
template
<
class
Stream
>
inline
Stream
&
operator
<<
(
Stream
&
s
,
std
::
nullptr_t
)
inline
Stream
&
operator
<<
(
Stream
&
s
,
std
::
nullptr_t
)
...
@@ -136,6 +128,17 @@ inline auto operator<<(Stream& s, const Range& v) -> decltype(stream_range(s, v.
...
@@ -136,6 +128,17 @@ inline auto operator<<(Stream& s, const Range& v) -> decltype(stream_range(s, v.
return
s
;
return
s
;
}
}
template
<
class
Stream
,
class
Iterator
>
inline
Stream
&
stream_range
(
Stream
&
s
,
Iterator
start
,
Iterator
last
)
{
if
(
start
!=
last
)
{
s
<<
*
start
;
std
::
for_each
(
std
::
next
(
start
),
last
,
[
&
](
auto
&&
x
)
{
s
<<
", "
<<
x
;
});
}
return
s
;
}
template
<
class
T
>
template
<
class
T
>
const
T
&
get_value
(
const
T
&
x
)
const
T
&
get_value
(
const
T
&
x
)
{
{
...
@@ -342,7 +345,7 @@ inline std::ostream& operator<<(std::ostream& os, const color& c)
...
@@ -342,7 +345,7 @@ inline std::ostream& operator<<(std::ostream& os, const color& c)
template
<
class
T
,
class
F
>
template
<
class
T
,
class
F
>
void
failed
(
T
x
,
const
char
*
msg
,
const
char
*
func
,
const
char
*
file
,
int
line
,
F
f
)
void
failed
(
T
x
,
const
char
*
msg
,
const
char
*
func
,
const
char
*
file
,
int
line
,
F
f
)
{
{
if
(
!
bool
(
x
.
value
()))
if
(
not
bool
(
x
.
value
()))
{
{
std
::
cout
<<
func
<<
std
::
endl
;
std
::
cout
<<
func
<<
std
::
endl
;
std
::
cout
<<
file
<<
":"
<<
line
<<
":"
<<
std
::
endl
;
std
::
cout
<<
file
<<
":"
<<
line
<<
":"
<<
std
::
endl
;
...
...
Prev
1
…
13
14
15
16
17
18
19
20
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment