Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
0662a9a3
Unverified
Commit
0662a9a3
authored
Nov 09, 2023
by
Brian Pickrell
Committed by
GitHub
Nov 09, 2023
Browse files
Merge branch 'develop' into dyn_resize_gather
parents
b74d3a8f
35e5298e
Changes
130
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
804 additions
and
439 deletions
+804
-439
src/onnx/parse_multinomial.cpp
src/onnx/parse_multinomial.cpp
+74
-16
src/onnx/parse_resize.cpp
src/onnx/parse_resize.cpp
+71
-3
src/onnx/parse_slice.cpp
src/onnx/parse_slice.cpp
+7
-5
src/onnx/parse_split.cpp
src/onnx/parse_split.cpp
+26
-5
src/program.cpp
src/program.cpp
+1
-1
src/py/migraphx_py.cpp
src/py/migraphx_py.cpp
+5
-2
src/quantization.cpp
src/quantization.cpp
+1
-1
src/rewrite_quantization.cpp
src/rewrite_quantization.cpp
+1
-1
src/simplify_dyn_ops.cpp
src/simplify_dyn_ops.cpp
+46
-2
src/simplify_reshapes.cpp
src/simplify_reshapes.cpp
+2
-2
src/targets/gpu/CMakeLists.txt
src/targets/gpu/CMakeLists.txt
+24
-22
src/targets/gpu/compile_hip.cpp
src/targets/gpu/compile_hip.cpp
+11
-8
src/targets/gpu/compile_miopen.cpp
src/targets/gpu/compile_miopen.cpp
+4
-15
src/targets/gpu/compile_ops.cpp
src/targets/gpu/compile_ops.cpp
+10
-7
src/targets/gpu/device/int8_gemm_pack.cpp
src/targets/gpu/device/int8_gemm_pack.cpp
+0
-97
src/targets/gpu/gemm_impl.cpp
src/targets/gpu/gemm_impl.cpp
+474
-157
src/targets/gpu/include/migraphx/gpu/compile_miopen.hpp
src/targets/gpu/include/migraphx/gpu/compile_miopen.hpp
+1
-1
src/targets/gpu/include/migraphx/gpu/convolution.hpp
src/targets/gpu/include/migraphx/gpu/convolution.hpp
+9
-31
src/targets/gpu/include/migraphx/gpu/device/int8_gemm_pack.hpp
...argets/gpu/include/migraphx/gpu/device/int8_gemm_pack.hpp
+0
-49
src/targets/gpu/include/migraphx/gpu/gemm.hpp
src/targets/gpu/include/migraphx/gpu/gemm.hpp
+37
-14
No files found.
src/onnx/parse_multinomial.cpp
View file @
0662a9a3
/*
/*
* The MIT License (MIT)
* The MIT License (MIT)
*
*
* Copyright (c) 2015-202
2
Advanced Micro Devices, Inc. All rights reserved.
* Copyright (c) 2015-202
3
Advanced Micro Devices, Inc. All rights reserved.
*
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* of this software and associated documentation files (the "Software"), to deal
...
@@ -41,6 +41,9 @@ struct parse_multinomial : op_parser<parse_multinomial>
...
@@ -41,6 +41,9 @@ struct parse_multinomial : op_parser<parse_multinomial>
const
onnx_parser
::
node_info
&
info
,
const
onnx_parser
::
node_info
&
info
,
std
::
vector
<
instruction_ref
>
args
)
const
std
::
vector
<
instruction_ref
>
args
)
const
{
{
if
(
args
.
empty
())
MIGRAPHX_THROW
(
"PARSE_MULTINOMIAL: no arguments given"
);
int
dtype
=
6
;
int
dtype
=
6
;
if
(
contains
(
info
.
attributes
,
"dtype"
))
if
(
contains
(
info
.
attributes
,
"dtype"
))
dtype
=
info
.
attributes
.
at
(
"dtype"
).
i
();
dtype
=
info
.
attributes
.
at
(
"dtype"
).
i
();
...
@@ -49,35 +52,90 @@ struct parse_multinomial : op_parser<parse_multinomial>
...
@@ -49,35 +52,90 @@ struct parse_multinomial : op_parser<parse_multinomial>
size_t
sample_size
=
1
;
size_t
sample_size
=
1
;
if
(
contains
(
info
.
attributes
,
"sample_size"
))
if
(
contains
(
info
.
attributes
,
"sample_size"
))
sample_size
=
info
.
attributes
.
at
(
"sample_size"
).
i
();
sample_size
=
info
.
attributes
.
at
(
"sample_size"
).
i
();
else
MIGRAPHX_THROW
(
"PARSE_MULTINOMIAL: sample_size not given"
);
// Use logarithmic math to scale probabilities while avoiding division by very
// small numbers. Scaling by the maximum makes very tiny ranges more
// tractable; any constant factor gives equivalent distr. since the Multinomial op.
// normalizes at runtime.
// Subtract the per-batch maximum log-probability, making the per-batch max 0
// Subtract the per-batch maximum log-probability, making the per-batch max 0
auto
maxes
=
auto
maxes
=
info
.
add_instruction
(
migraphx
::
make_op
(
"reduce_max"
,
{{
"axes"
,
{
1
}}}),
args
[
0
]);
info
.
add_instruction
(
migraphx
::
make_op
(
"reduce_max"
,
{{
"axes"
,
{
1
}}}),
args
[
0
]);
auto
mb_maxes
=
info
.
add_instruction
(
auto
cdf
=
info
.
add_common_op
(
"sub"
,
args
[
0
],
maxes
);
migraphx
::
make_op
(
"multibroadcast"
,
{{
"out_lens"
,
args
[
0
]
->
get_shape
().
lens
()}}),
maxes
);
auto
cdf
=
info
.
add_instruction
(
migraphx
::
make_op
(
"sub"
),
args
[
0
],
mb_maxes
);
// Take the element-wise exponent to get probabilities in the range (0, 1]
// Take the element-wise exponent to get probabilities in the range (0, 1]
cdf
=
info
.
add_instruction
(
migraphx
::
make_op
(
"exp"
),
cdf
);
cdf
=
info
.
add_instruction
(
migraphx
::
make_op
(
"exp"
),
cdf
);
// Compute the cumulative d
ensity
function
// Compute the cumulative d
istribution
function
cdf
=
info
.
add_instruction
(
cdf
=
info
.
add_instruction
(
migraphx
::
make_op
(
"prefix_scan_sum"
,
{{
"axis"
,
1
},
{
"exclusive"
,
false
}}),
cdf
);
migraphx
::
make_op
(
"prefix_scan_sum"
,
{{
"axis"
,
1
},
{
"exclusive"
,
false
}}),
cdf
);
// Pre-compute random distribution
instruction_ref
seed_input
;
std
::
mt19937
gen
(
std
::
chrono
::
high_resolution_clock
::
now
().
time_since_epoch
().
count
());
if
(
contains
(
info
.
attributes
,
"seed"
))
if
(
contains
(
info
.
attributes
,
"seed"
))
gen
.
seed
(
info
.
attributes
.
at
(
"seed"
).
f
());
{
float
seed
=
info
.
attributes
.
at
(
"seed"
).
f
();
migraphx
::
shape
s
{
migraphx
::
shape
::
float_type
,
{
1
}};
std
::
vector
<
float
>
data
=
{
seed
};
seed_input
=
info
.
add_literal
(
migraphx
::
literal
(
s
,
data
));
}
else
{
seed_input
=
info
.
add_instruction
(
migraphx
::
make_op
(
"random_seed"
));
}
instruction_ref
randoms
;
shape
s0
=
args
[
0
]
->
get_shape
();
if
(
s0
.
dynamic
())
{
// Dynamic batch_size will be taken from args[0]. The input argument to this should
// have a second dimension of sample_size.
std
::
vector
<
shape
::
dynamic_dimension
>
dyn_dim_set
;
dyn_dim_set
.
emplace_back
(
s0
.
dyn_dims
().
front
());
dyn_dim_set
.
emplace_back
(
shape
::
dynamic_dimension
{
sample_size
,
sample_size
});
// read the input dimensions
auto
dim_of
=
info
.
add_instruction
(
migraphx
::
make_op
(
"dimensions_of"
,
{{
"end"
,
2
}}),
args
[
0
]);
// The next two operations insert the value sample_size into the second array position
// make an argument of (1, 0)
shape
s
(
shape
::
int64_type
,
{
2
});
std
::
vector
<
int64_t
>
data1
{
1
,
0
};
auto
l1
=
info
.
add_literal
(
s
,
data1
);
auto
batch_arg
=
info
.
add_instruction
(
migraphx
::
make_op
(
"mul"
),
dim_of
,
l1
);
std
::
vector
<
int64_t
>
data2
(
2
,
0
);
// make an argument of (0, sample_size)
data2
[
1
]
=
sample_size
;
auto
l2
=
info
.
add_literal
(
s
,
data2
);
auto
alloc_shape
=
info
.
add_instruction
(
migraphx
::
make_op
(
"add"
),
batch_arg
,
l2
);
// alloc_shape should contain the input-based shape dimensions as its values at runtime,
// and its own shape is {2}
std
::
uniform_real_distribution
<>
dis
(
0.0
,
1.0
);
// compile_shape is the shape used when compiling the Allocate op, and may be dynamic
size_t
batch_size
=
args
[
0
]
->
get_shape
().
lens
().
front
();
migraphx
::
shape
compile_shape
=
migraphx
::
shape
dist_shape
{
migraphx
::
shape
::
float_type
,
{
batch
_size
,
sample_size
}};
migraphx
::
shape
(
s0
.
type
(),
{
s0
.
dyn_dims
().
front
(),
{
sample
_size
,
sample_size
}}
)
;
std
::
vector
<
float
>
random_dist
(
batch_size
*
sample_size
);
// Allocate on-device storage for the random values
std
::
generate
(
random_dist
.
begin
(),
random_dist
.
end
(),
[
&
]()
{
return
dis
(
gen
);
});
auto
alloc
=
info
.
add_instruction
(
auto
dist_lit
=
info
.
add_literal
(
migraphx
::
literal
{
dist_shape
,
random_dist
});
migraphx
::
make_op
(
"allocate"
,
{{
"shape"
,
to_value
(
compile_shape
)}}),
alloc_shape
);
randoms
=
info
.
add_instruction
(
migraphx
::
make_op
(
"random_uniform"
),
seed_input
,
alloc
);
}
else
{
// use literal. The array populated by random_uniform may have any shape, as long its
// number of elements is batch_size * sample_size .
size_t
batch_size
=
s0
.
lens
().
front
();
auto
rand_dummy
=
info
.
add_literal
(
migraphx
::
literal
{
migraphx
::
shape
::
float_type
,
{
batch_size
*
sample_size
}});
randoms
=
info
.
add_instruction
(
migraphx
::
make_op
(
"random_uniform"
),
seed_input
,
rand_dummy
);
}
return
info
.
add_instruction
(
return
info
.
add_instruction
(
migraphx
::
make_op
(
"multinomial"
,
{{
"dtype"
,
output_type
}}),
cdf
,
dist_lit
);
migraphx
::
make_op
(
"multinomial"
,
{{
"dtype"
,
output_type
}}),
cdf
,
randoms
);
}
}
};
};
...
...
src/onnx/parse_resize.cpp
View file @
0662a9a3
...
@@ -181,6 +181,76 @@ static std::string get_nearest_mode(const onnx_parser::attribute_map& attr)
...
@@ -181,6 +181,76 @@ static std::string get_nearest_mode(const onnx_parser::attribute_map& attr)
return
nearest_mode
;
return
nearest_mode
;
}
}
static
std
::
vector
<
double
>
get_scales
(
const
onnx_parser
::
attribute_map
&
attr
)
{
std
::
vector
<
double
>
scales
;
if
(
contains
(
attr
,
"scales"
))
{
copy
(
attr
.
at
(
"scales"
).
floats
(),
std
::
back_inserter
(
scales
));
}
return
scales
;
}
static
void
parse_args
(
const
std
::
vector
<
instruction_ref
>&
args
,
const
std
::
vector
<
size_t
>&
in_lens
,
const
std
::
string
&
op_name
,
std
::
vector
<
double
>&
vec_scale
,
std
::
vector
<
std
::
size_t
>&
out_lens
)
{
for
(
const
auto
&
arg
:
args
)
{
if
(
arg
->
name
()
==
"undefined"
or
arg
==
args
.
front
())
{
continue
;
}
// skipped empty input
auto
lens
=
arg
->
get_shape
().
lens
();
if
(
lens
.
empty
())
{
continue
;
}
auto
type
=
arg
->
get_shape
().
type
();
// output size
if
(
type
==
shape
::
int64_type
)
{
auto
arg_out_s
=
arg
->
eval
();
check_arg_empty
(
arg_out_s
,
"PARSE_"
+
op_name
+
": dynamic output size is not supported!"
);
arg_out_s
.
visit
([
&
](
const
auto
&
ol
)
{
out_lens
.
assign
(
ol
.
begin
(),
ol
.
end
());
});
if
(
out_lens
.
size
()
!=
in_lens
.
size
())
{
MIGRAPHX_THROW
(
"PARSE_"
+
op_name
+
": specified output size does not match input size"
);
}
// compute the scale
vec_scale
.
resize
(
in_lens
.
size
());
std
::
transform
(
in_lens
.
begin
(),
in_lens
.
end
(),
out_lens
.
begin
(),
vec_scale
.
begin
(),
[](
auto
iss
,
auto
oss
)
{
return
1.0
*
oss
/
iss
;
});
}
else
{
// scale input
if
(
lens
[
0
]
==
in_lens
.
size
())
{
auto
arg_scale
=
arg
->
eval
();
check_arg_empty
(
arg_scale
,
"PARSE_"
+
op_name
+
": dynamic input scale is not supported!"
);
arg_scale
.
visit
([
&
](
const
auto
&
v
)
{
vec_scale
.
assign
(
v
.
begin
(),
v
.
end
());
});
}
}
}
}
struct
parse_resize
:
op_parser
<
parse_resize
>
struct
parse_resize
:
op_parser
<
parse_resize
>
{
{
std
::
vector
<
op_desc
>
operators
()
const
{
return
{{
"Resize"
},
{
"Upsample"
}};
}
std
::
vector
<
op_desc
>
operators
()
const
{
return
{{
"Resize"
},
{
"Upsample"
}};
}
...
@@ -309,7 +379,7 @@ struct parse_resize : op_parser<parse_resize>
...
@@ -309,7 +379,7 @@ struct parse_resize : op_parser<parse_resize>
std
::
vector
<
size_t
>
out_lens
(
in_s
.
ndim
());
std
::
vector
<
size_t
>
out_lens
(
in_s
.
ndim
());
// scale
// scale
std
::
vector
<
double
>
vec_scale
;
std
::
vector
<
double
>
vec_scale
=
get_scales
(
info
.
attributes
)
;
// Look at inputs and infer either output size or scale, depending on input type
// Look at inputs and infer either output size or scale, depending on input type
for
(
const
auto
&
arg
:
args
)
for
(
const
auto
&
arg
:
args
)
...
@@ -381,8 +451,6 @@ struct parse_resize : op_parser<parse_resize>
...
@@ -381,8 +451,6 @@ struct parse_resize : op_parser<parse_resize>
}
}
}
}
// Dynamic batch: Only args[0] can have a dynamic shape, only the 0'th
// dimension--batch size--can be non-fixed, and the only resize mode allowed is "nearest"
if
(
args
[
0
]
->
get_shape
().
dynamic
())
if
(
args
[
0
]
->
get_shape
().
dynamic
())
{
{
return
dynamic_nearest_parse
(
out_lens
,
vec_scale
,
opd
,
info
,
args
);
return
dynamic_nearest_parse
(
out_lens
,
vec_scale
,
opd
,
info
,
args
);
...
...
src/onnx/parse_slice.cpp
View file @
0662a9a3
...
@@ -46,6 +46,9 @@ struct parse_slice : op_parser<parse_slice>
...
@@ -46,6 +46,9 @@ struct parse_slice : op_parser<parse_slice>
void
always_insert
(
instruction_ref
arg
)
{
op_args
.
insert
(
op_args
.
begin
(),
arg
);
}
void
always_insert
(
instruction_ref
arg
)
{
op_args
.
insert
(
op_args
.
begin
(),
arg
);
}
/**
* Either insert argument into `this->op_args` or return the constant value of the argument
*/
std
::
vector
<
int64_t
>
insert
(
instruction_ref
arg
)
std
::
vector
<
int64_t
>
insert
(
instruction_ref
arg
)
{
{
std
::
vector
<
int64_t
>
result
;
std
::
vector
<
int64_t
>
result
;
...
@@ -137,23 +140,22 @@ struct parse_slice : op_parser<parse_slice>
...
@@ -137,23 +140,22 @@ struct parse_slice : op_parser<parse_slice>
sd
.
always_insert
(
args
.
at
(
0
));
sd
.
always_insert
(
args
.
at
(
0
));
// If axes arg is not given, the default is all of them.
// If axes arg is not given, the default is all of them.
if
(
sd
.
op
.
axes
.
empty
()
and
sd
.
op_args
.
size
()
<
3
)
if
(
sd
.
op
.
axes
.
empty
()
and
sd
.
op_args
.
size
()
<
=
3
)
{
{
std
::
vector
<
int64_t
>
axes
(
args
[
0
]
->
get_shape
().
ndim
());
std
::
vector
<
int64_t
>
axes
(
args
[
0
]
->
get_shape
().
ndim
());
std
::
iota
(
axes
.
begin
(),
axes
.
end
(),
int64_t
{
0
});
std
::
iota
(
axes
.
begin
(),
axes
.
end
(),
int64_t
{
0
});
sd
.
op
.
axes
=
axes
;
sd
.
op
.
axes
=
axes
;
}
}
if
(
not
sd
.
steps
.
empty
(
))
if
(
std
::
any_of
(
sd
.
steps
.
begin
(),
sd
.
steps
.
end
(),
[](
auto
s
)
{
return
s
!=
1
;
}
))
{
{
if
(
sd
.
op
.
starts
.
empty
()
or
sd
.
op
.
ends
.
empty
())
if
(
sd
.
op
.
starts
.
empty
()
or
sd
.
op
.
ends
.
empty
())
MIGRAPHX_THROW
(
"PARSE_SLICE: steps and variable starts and ends is not supported"
);
MIGRAPHX_THROW
(
"PARSE_SLICE: steps and variable starts and/or ends is not supported"
);
if
(
sd
.
op
.
axes
.
empty
())
if
(
sd
.
op
.
axes
.
empty
())
MIGRAPHX_THROW
(
"PARSE_SLICE: steps and variable axes is not supported"
);
MIGRAPHX_THROW
(
"PARSE_SLICE: steps and variable axes is not supported"
);
}
}
assert
(
sd
.
steps
.
empty
()
or
sd
.
steps
.
size
()
==
sd
.
op
.
axes
.
size
());
// If any axes have negative step, prepare to add a "reverse" op
// If any axes have negative step, prepare to add a "reverse" op
for
(
auto
i
:
range
(
sd
.
steps
.
size
()))
for
(
auto
i
:
range
(
sd
.
steps
.
size
()))
{
{
...
...
src/onnx/parse_split.cpp
View file @
0662a9a3
...
@@ -68,13 +68,34 @@ struct parse_split : op_parser<parse_split>
...
@@ -68,13 +68,34 @@ struct parse_split : op_parser<parse_split>
// no split attribute, input is equally divided
// no split attribute, input is equally divided
else
else
{
{
if
((
lens
[
tuned_axis
]
%
info
.
num_outputs
)
!=
0
)
std
::
size_t
num_outputs
=
info
.
num_outputs
;
// the num_outputs attribute seems to be redundant since we already have
// node_info::num_outputs, but we can still perform an error check
if
(
contains
(
info
.
attributes
,
"num_outputs"
))
{
{
MIGRAPHX_THROW
(
"PARSE_SPLIT: input cannot be equally divided into "
+
num_outputs
=
std
::
to_string
(
info
.
num_outputs
)
+
" splits!"
);
parser
.
parse_value
(
info
.
attributes
.
at
(
"num_outputs"
)).
at
<
std
::
size_t
>
();
if
(
num_outputs
!=
info
.
num_outputs
)
{
MIGRAPHX_THROW
(
"PARSE_SPLIT: num_outputs attribute "
+
std
::
to_string
(
num_outputs
)
+
" doesn't match actual number of outputs "
+
std
::
to_string
(
info
.
num_outputs
)
+
"!"
);
}
}
if
(
lens
[
tuned_axis
]
%
num_outputs
==
0
)
{
std
::
size_t
chunk_size
=
lens
[
tuned_axis
]
/
num_outputs
;
vec_splits
.
resize
(
num_outputs
,
chunk_size
);
}
else
{
std
::
size_t
chunk_size
=
lens
[
tuned_axis
]
/
num_outputs
+
1
;
std
::
size_t
last_chunk_size
=
lens
[
tuned_axis
]
-
chunk_size
*
(
num_outputs
-
1
);
vec_splits
.
resize
(
num_outputs
-
1
,
chunk_size
);
vec_splits
.
push_back
(
last_chunk_size
);
}
}
auto
dl
=
lens
[
tuned_axis
]
/
info
.
num_outputs
;
vec_splits
.
resize
(
info
.
num_outputs
,
dl
);
}
}
if
(
std
::
accumulate
(
vec_splits
.
begin
(),
vec_splits
.
end
(),
int64_t
(
0
))
!=
if
(
std
::
accumulate
(
vec_splits
.
begin
(),
vec_splits
.
end
(),
int64_t
(
0
))
!=
...
...
src/program.cpp
View file @
0662a9a3
...
@@ -936,7 +936,7 @@ void program::perf_report(std::ostream& os,
...
@@ -936,7 +936,7 @@ void program::perf_report(std::ostream& os,
os
<<
std
::
endl
;
os
<<
std
::
endl
;
os
<<
"Batch size: "
<<
batch
<<
std
::
endl
;
os
<<
"Batch size: "
<<
batch
<<
std
::
endl
;
os
<<
"Rate: "
<<
rate
*
batch
<<
"inferences/sec"
<<
std
::
endl
;
os
<<
"Rate: "
<<
rate
*
batch
<<
"
inferences/sec"
<<
std
::
endl
;
os
<<
"Total time: "
<<
total_time
<<
"ms"
<<
std
::
endl
;
os
<<
"Total time: "
<<
total_time
<<
"ms"
<<
std
::
endl
;
os
<<
"Total instructions time: "
<<
total_instruction_time
<<
"ms"
<<
std
::
endl
;
os
<<
"Total instructions time: "
<<
total_instruction_time
<<
"ms"
<<
std
::
endl
;
os
<<
"Overhead time: "
<<
overhead_time
<<
"ms"
os
<<
"Overhead time: "
<<
overhead_time
<<
"ms"
...
...
src/py/migraphx_py.cpp
View file @
0662a9a3
...
@@ -472,7 +472,8 @@ MIGRAPHX_PYBIND11_MODULE(migraphx, m)
...
@@ -472,7 +472,8 @@ MIGRAPHX_PYBIND11_MODULE(migraphx, m)
map_dyn_input_dims
,
map_dyn_input_dims
,
bool
skip_unknown_operators
,
bool
skip_unknown_operators
,
bool
print_program_on_error
,
bool
print_program_on_error
,
int64_t
max_loop_iterations
)
{
int64_t
max_loop_iterations
,
int64_t
limit_max_iterations
)
{
migraphx
::
onnx_options
options
;
migraphx
::
onnx_options
options
;
options
.
default_dim_value
=
default_dim_value
;
options
.
default_dim_value
=
default_dim_value
;
options
.
default_dyn_dim_value
=
default_dyn_dim_value
;
options
.
default_dyn_dim_value
=
default_dyn_dim_value
;
...
@@ -481,6 +482,7 @@ MIGRAPHX_PYBIND11_MODULE(migraphx, m)
...
@@ -481,6 +482,7 @@ MIGRAPHX_PYBIND11_MODULE(migraphx, m)
options
.
skip_unknown_operators
=
skip_unknown_operators
;
options
.
skip_unknown_operators
=
skip_unknown_operators
;
options
.
print_program_on_error
=
print_program_on_error
;
options
.
print_program_on_error
=
print_program_on_error
;
options
.
max_loop_iterations
=
max_loop_iterations
;
options
.
max_loop_iterations
=
max_loop_iterations
;
options
.
limit_max_iterations
=
limit_max_iterations
;
return
migraphx
::
parse_onnx
(
filename
,
options
);
return
migraphx
::
parse_onnx
(
filename
,
options
);
},
},
"Parse onnx file"
,
"Parse onnx file"
,
...
@@ -492,7 +494,8 @@ MIGRAPHX_PYBIND11_MODULE(migraphx, m)
...
@@ -492,7 +494,8 @@ MIGRAPHX_PYBIND11_MODULE(migraphx, m)
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
migraphx
::
shape
::
dynamic_dimension
>>
(),
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
migraphx
::
shape
::
dynamic_dimension
>>
(),
py
::
arg
(
"skip_unknown_operators"
)
=
false
,
py
::
arg
(
"skip_unknown_operators"
)
=
false
,
py
::
arg
(
"print_program_on_error"
)
=
false
,
py
::
arg
(
"print_program_on_error"
)
=
false
,
py
::
arg
(
"max_loop_iterations"
)
=
10
);
py
::
arg
(
"max_loop_iterations"
)
=
10
,
py
::
arg
(
"limit_max_iterations"
)
=
std
::
numeric_limits
<
uint16_t
>::
max
());
m
.
def
(
m
.
def
(
"parse_onnx_buffer"
,
"parse_onnx_buffer"
,
...
...
src/quantization.cpp
View file @
0662a9a3
...
@@ -147,8 +147,8 @@ void quantize_int8(program& prog,
...
@@ -147,8 +147,8 @@ void quantize_int8(program& prog,
run_passes
(
prog
,
run_passes
(
prog
,
{
quantize_int8_pass
{
ins_names
,
*
int8_quant_params
},
{
quantize_int8_pass
{
ins_names
,
*
int8_quant_params
},
optimize_module
{},
simplify_qdq
{},
simplify_qdq
{},
optimize_module
{},
dead_code_elimination
{}});
dead_code_elimination
{}});
}
}
...
...
src/rewrite_quantization.cpp
View file @
0662a9a3
...
@@ -47,7 +47,7 @@ void apply_quantizelinear(module& m, instruction_ref ins)
...
@@ -47,7 +47,7 @@ void apply_quantizelinear(module& m, instruction_ref ins)
ins
,
make_op
(
"convert"
,
{{
"target_type"
,
y_scale
->
get_shape
().
type
()}}),
x
);
ins
,
make_op
(
"convert"
,
{{
"target_type"
,
y_scale
->
get_shape
().
type
()}}),
x
);
}
}
auto
div
=
m
.
insert_instruction
(
ins
,
make_op
(
"div"
),
x
,
y_scale
);
auto
div
=
m
.
insert_instruction
(
ins
,
make_op
(
"div"
),
x
,
y_scale
);
auto
add_zero_point
=
m
.
insert_instruction
(
ins
,
make_op
(
"
round
"
),
div
);
auto
add_zero_point
=
m
.
insert_instruction
(
ins
,
make_op
(
"
nearbyint
"
),
div
);
if
(
ins
->
inputs
().
size
()
==
3
)
if
(
ins
->
inputs
().
size
()
==
3
)
{
{
...
...
src/simplify_dyn_ops.cpp
View file @
0662a9a3
...
@@ -24,6 +24,7 @@
...
@@ -24,6 +24,7 @@
#include <migraphx/simplify_dyn_ops.hpp>
#include <migraphx/simplify_dyn_ops.hpp>
#include <migraphx/matcher.hpp>
#include <migraphx/matcher.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/literal.hpp>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
@@ -131,10 +132,53 @@ struct find_const_4in_slice
...
@@ -131,10 +132,53 @@ struct find_const_4in_slice
}
}
};
};
/**
* Simplify dimensions_of to a literal when the input arugment has a static shape
* or the dynamic dimensions from `start` to `end` are fixed.
*/
struct
find_static_dimensions_of
{
auto
matcher
()
const
{
return
match
::
name
(
"dimensions_of"
)();
}
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
mr
)
const
{
auto
ins
=
mr
.
result
;
auto
input
=
ins
->
inputs
().
at
(
0
);
auto
dimensions_of_value
=
ins
->
get_operator
().
to_value
();
auto
start
=
dimensions_of_value
.
at
(
"start"
).
to
<
std
::
size_t
>
();
auto
end
=
dimensions_of_value
.
at
(
"end"
).
to
<
std
::
size_t
>
();
if
(
input
->
get_shape
().
dynamic
())
{
// check if dynamic dimensions from start to end are fixed
auto
dds
=
input
->
get_shape
().
dyn_dims
();
if
(
std
::
any_of
(
dds
.
begin
()
+
start
,
dds
.
begin
()
+
end
,
[](
auto
dd
)
{
return
not
dd
.
is_fixed
();
}))
{
return
;
}
}
std
::
size_t
output_ndim
=
end
-
start
;
std
::
vector
<
int64_t
>
vec_shape
(
output_ndim
);
migraphx
::
shape
s
(
migraphx
::
shape
::
int64_type
,
{
output_ndim
});
std
::
vector
<
std
::
size_t
>
input_lens
=
input
->
get_shape
().
to_static
(
1
).
lens
();
std
::
transform
(
input_lens
.
begin
()
+
start
,
input_lens
.
begin
()
+
end
,
vec_shape
.
begin
(),
[](
auto
i
)
{
return
int64_t
(
i
);
});
migraphx
::
shape
output_shape
{
migraphx
::
shape
::
int64_type
,
{
end
-
start
}};
auto
lit_ins
=
m
.
add_literal
(
migraphx
::
literal
{
output_shape
,
vec_shape
});
m
.
replace_instruction
(
ins
,
lit_ins
);
}
};
void
simplify_dyn_ops
::
apply
(
module
&
m
)
const
void
simplify_dyn_ops
::
apply
(
module
&
m
)
const
{
{
match
::
find_matches
(
match
::
find_matches
(
m
,
m
,
find_static_2in_broadcasts
{},
find_const_3in_slice
{},
find_const_4in_slice
{});
find_static_2in_broadcasts
{},
find_static_dimensions_of
{},
find_const_3in_slice
{},
find_const_4in_slice
{});
}
}
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace MIGRAPHX_INLINE_NS
...
...
src/simplify_reshapes.cpp
View file @
0662a9a3
src/targets/gpu/CMakeLists.txt
View file @
0662a9a3
# ####################################################################################
# ####################################################################################
# The MIT License (MIT)
# The MIT License (MIT)
#
#
# Copyright (c) 2015-202
2
Advanced Micro Devices, Inc. All rights reserved.
# Copyright (c) 2015-202
3
Advanced Micro Devices, Inc. All rights reserved.
#
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# of this software and associated documentation files (the "Software"), to deal
...
@@ -37,8 +37,7 @@ if(NOT TARGET MIOpen)
...
@@ -37,8 +37,7 @@ if(NOT TARGET MIOpen)
message
(
SEND_ERROR
"Cant find miopen"
)
message
(
SEND_ERROR
"Cant find miopen"
)
endif
()
endif
()
if
(
NOT WIN32
)
if
(
MIGRAPHX_USE_COMPOSABLEKERNEL
)
# TODO: re-enable when CK is ported to Windows
find_package
(
composable_kernel 1.0.0 REQUIRED COMPONENTS jit_library
)
find_package
(
composable_kernel 1.0.0 REQUIRED COMPONENTS jit_library
)
endif
()
endif
()
...
@@ -52,10 +51,10 @@ file(GLOB KERNEL_FILES CONFIGURE_DEPENDS
...
@@ -52,10 +51,10 @@ file(GLOB KERNEL_FILES CONFIGURE_DEPENDS
${
CMAKE_CURRENT_SOURCE_DIR
}
/kernels/include/migraphx/kernels/*.hpp
)
${
CMAKE_CURRENT_SOURCE_DIR
}
/kernels/include/migraphx/kernels/*.hpp
)
message
(
STATUS
"KERNEL_FILES:
${
KERNEL_FILES
}
"
)
message
(
STATUS
"KERNEL_FILES:
${
KERNEL_FILES
}
"
)
if
(
WIN32
)
if
(
NOT MIGRAPHX_USE_COMPOSABLEKERNEL
)
# TODO: re-enable when CK is ported to Windows
list
(
REMOVE_ITEM KERNEL_FILES
list
(
REMOVE_ITEM KERNEL_FILES
${
CMAKE_CURRENT_SOURCE_DIR
}
/kernels/include/migraphx/kernels/ck_gemm.hpp
${
CMAKE_CURRENT_SOURCE_DIR
}
/kernels/include/migraphx/kernels/ck_gemm.hpp
${
CMAKE_CURRENT_SOURCE_DIR
}
/kernels/include/migraphx/kernels/ck_gemm_softmax_gemm.hpp
${
CMAKE_CURRENT_SOURCE_DIR
}
/kernels/include/migraphx/kernels/ck.hpp
)
${
CMAKE_CURRENT_SOURCE_DIR
}
/kernels/include/migraphx/kernels/ck.hpp
)
endif
()
endif
()
...
@@ -103,9 +102,10 @@ rocm_clang_tidy_check(kernel_file_check)
...
@@ -103,9 +102,10 @@ rocm_clang_tidy_check(kernel_file_check)
file
(
GLOB JIT_GPU_SRCS CONFIGURE_DEPENDS
${
CMAKE_CURRENT_SOURCE_DIR
}
/jit/*.cpp
)
file
(
GLOB JIT_GPU_SRCS CONFIGURE_DEPENDS
${
CMAKE_CURRENT_SOURCE_DIR
}
/jit/*.cpp
)
if
(
WIN32
)
if
(
NOT MIGRAPHX_USE_COMPOSABLEKERNEL
)
# TODO: re-enable when CK is ported to Windows
list
(
REMOVE_ITEM JIT_GPU_SRCS
list
(
REMOVE_ITEM JIT_GPU_SRCS
${
CMAKE_CURRENT_SOURCE_DIR
}
/jit/ck_gemm.cpp
)
${
CMAKE_CURRENT_SOURCE_DIR
}
/jit/ck_gemm.cpp
${
CMAKE_CURRENT_SOURCE_DIR
}
/jit/ck_gemm_softmax_gemm.cpp
)
endif
()
endif
()
add_library
(
migraphx_gpu
add_library
(
migraphx_gpu
...
@@ -128,8 +128,6 @@ add_library(migraphx_gpu
...
@@ -128,8 +128,6 @@ add_library(migraphx_gpu
gather.cpp
gather.cpp
gemm_impl.cpp
gemm_impl.cpp
hip.cpp
hip.cpp
int8_conv_pack.cpp
int8_gemm_pack.cpp
kernel.cpp
kernel.cpp
lowering.cpp
lowering.cpp
logsoftmax.cpp
logsoftmax.cpp
...
@@ -140,7 +138,6 @@ add_library(migraphx_gpu
...
@@ -140,7 +138,6 @@ add_library(migraphx_gpu
no_device.cpp
no_device.cpp
nonzero.cpp
nonzero.cpp
pack_args.cpp
pack_args.cpp
pack_int8_args.cpp
prefuse_ops.cpp
prefuse_ops.cpp
pad.cpp
pad.cpp
perfdb.cpp
perfdb.cpp
...
@@ -184,7 +181,6 @@ register_migraphx_gpu_ops(hip_
...
@@ -184,7 +181,6 @@ register_migraphx_gpu_ops(hip_
register_migraphx_gpu_ops
(
miopen_
register_migraphx_gpu_ops
(
miopen_
abs
abs
contiguous
contiguous
int8_conv_pack
lrn
lrn
pooling
pooling
)
)
...
@@ -192,10 +188,6 @@ register_op(migraphx_gpu
...
@@ -192,10 +188,6 @@ register_op(migraphx_gpu
HEADER migraphx/gpu/rnn_variable_seq_lens.hpp
HEADER migraphx/gpu/rnn_variable_seq_lens.hpp
OPERATORS gpu::hip_rnn_var_sl_shift_sequence gpu::hip_rnn_var_sl_shift_output gpu::hip_rnn_var_sl_last_output
OPERATORS gpu::hip_rnn_var_sl_shift_sequence gpu::hip_rnn_var_sl_shift_output gpu::hip_rnn_var_sl_last_output
INCLUDES migraphx/gpu/context.hpp
)
INCLUDES migraphx/gpu/context.hpp
)
register_op
(
migraphx_gpu
HEADER migraphx/gpu/int8_gemm_pack.hpp
OPERATORS gpu::hip_int8_gemm_pack_a gpu::hip_int8_gemm_pack_b
INCLUDES migraphx/gpu/context.hpp
)
register_op
(
migraphx_gpu
register_op
(
migraphx_gpu
HEADER migraphx/gpu/gemm.hpp
HEADER migraphx/gpu/gemm.hpp
OPERATORS gpu::rocblas_gemm<op::dot> gpu::rocblas_gemm<op::quant_dot>
OPERATORS gpu::rocblas_gemm<op::dot> gpu::rocblas_gemm<op::quant_dot>
...
@@ -239,24 +231,28 @@ else()
...
@@ -239,24 +231,28 @@ else()
string
(
REGEX REPLACE
" /[^ ]+
\\
.(a|so) "
" "
HIP_COMPILER_FLAGS
"
${
HIP_COMPILER_FLAGS
}
"
)
string
(
REGEX REPLACE
" /[^ ]+
\\
.(a|so) "
" "
HIP_COMPILER_FLAGS
"
${
HIP_COMPILER_FLAGS
}
"
)
endforeach
()
endforeach
()
message
(
STATUS
"Hip compiler flags:
${
HIP_COMPILER_FLAGS
}
"
)
message
(
STATUS
"Hip compiler flags:
\"
${
HIP_COMPILER_FLAGS
}
\"
"
)
target_compile_definitions
(
migraphx_gpu PRIVATE
target_compile_definitions
(
migraphx_gpu PRIVATE
"
-DMIGRAPHX_HIP_COMPILER=
${
CMAKE_CXX_COMPILER
}
"
-DMIGRAPHX_HIP_COMPILER=
"
${
CMAKE_CXX_COMPILER
}
"
"
-DMIGRAPHX_HIP_COMPILER_FLAGS=
${
HIP_COMPILER_FLAGS
}
"
-DMIGRAPHX_HIP_COMPILER_FLAGS=
"
${
HIP_COMPILER_FLAGS
}
"
)
)
if
(
DEFINED CMAKE_CXX_COMPILER_LAUNCHER
)
if
(
DEFINED CMAKE_CXX_COMPILER_LAUNCHER
)
execute_process
(
COMMAND which
${
CMAKE_CXX_COMPILER_LAUNCHER
}
OUTPUT_VARIABLE MIGRAPHX_HIP_COMPILER_LAUNCHER
)
execute_process
(
COMMAND which
${
CMAKE_CXX_COMPILER_LAUNCHER
}
OUTPUT_VARIABLE MIGRAPHX_HIP_COMPILER_LAUNCHER
)
string
(
STRIP
"
${
MIGRAPHX_HIP_COMPILER_LAUNCHER
}
"
MIGRAPHX_HIP_COMPILER_LAUNCHER
)
string
(
STRIP
"
${
MIGRAPHX_HIP_COMPILER_LAUNCHER
}
"
MIGRAPHX_HIP_COMPILER_LAUNCHER
)
target_compile_definitions
(
migraphx_gpu PRIVATE
"
-DMIGRAPHX_HIP_COMPILER_LAUNCHER=
${
MIGRAPHX_HIP_COMPILER_LAUNCHER
}
"
)
target_compile_definitions
(
migraphx_gpu PRIVATE -DMIGRAPHX_HIP_COMPILER_LAUNCHER=
"
${
MIGRAPHX_HIP_COMPILER_LAUNCHER
}
"
)
endif
()
endif
()
endif
()
endif
()
# Check miopen find mode api
# Check miopen find mode api
include
(
CheckLibraryExists
)
include
(
CheckLibraryExists
)
get_target_property
(
MIOPEN_LOCATION MIOpen LOCATION
)
get_target_property
(
MIOPEN_LOCATION MIOpen LOCATION
)
get_target_property
(
ROCBLAS_LOCATION roc::rocblas LOCATION
)
check_library_exists
(
MIOpen
"miopenHiddenSetConvolutionFindMode"
"
${
MIOPEN_LOCATION
}
"
HAS_FIND_MODE_API
)
check_library_exists
(
MIOpen
"miopenHiddenSetConvolutionFindMode"
"
${
MIOPEN_LOCATION
}
"
HAS_FIND_MODE_API
)
check_library_exists
(
MIOpen
"miopenFindSolutions"
"
${
MIOPEN_LOCATION
}
"
HAS_FIND_2_API
)
check_library_exists
(
MIOpen
"miopenFindSolutions"
"
${
MIOPEN_LOCATION
}
"
HAS_FIND_2_API
)
# Beta API for automated GEMM tuning
check_library_exists
(
roc::rocblas
"rocblas_gemm_ex_get_solutions"
"
${
ROCBLAS_LOCATION
}
"
HAS_ROCBLAS_TUNING_BETA_FEATURE_API
)
set
(
MIGRAPHX_USE_FIND_2_API
"
${
HAS_FIND_2_API
}
"
CACHE BOOL
""
)
set
(
MIGRAPHX_USE_FIND_2_API
"
${
HAS_FIND_2_API
}
"
CACHE BOOL
""
)
...
@@ -279,10 +275,16 @@ else()
...
@@ -279,10 +275,16 @@ else()
message
(
STATUS
"MIOpen does not have find mode api"
)
message
(
STATUS
"MIOpen does not have find mode api"
)
endif
()
endif
()
if
(
HAS_ROCBLAS_TUNING_BETA_FEATURE_API
)
target_compile_definitions
(
migraphx_gpu PUBLIC -DMIGRAPHX_USE_ROCBLAS_TUNING_API -DROCBLAS_BETA_FEATURES_API -DROCBLAS_NO_DEPRECATED_WARNINGS
)
message
(
STATUS
"MIGraphx is using Beta API of rocBLAS"
)
else
()
message
(
STATUS
"rocBLAS does not have User Tuning Beta API"
)
endif
()
target_link_libraries
(
migraphx_gpu PUBLIC migraphx MIOpen roc::rocblas
)
target_link_libraries
(
migraphx_gpu PUBLIC migraphx MIOpen roc::rocblas
)
target_link_libraries
(
migraphx_gpu PRIVATE migraphx_device migraphx_kernels
)
target_link_libraries
(
migraphx_gpu PRIVATE migraphx_device migraphx_kernels
)
if
(
NOT WIN32
)
if
(
MIGRAPHX_USE_COMPOSABLEKERNEL
)
# TODO: re-enable when CK is ported to Windows
target_link_libraries
(
migraphx_gpu PRIVATE composable_kernel::jit_library
)
target_link_libraries
(
migraphx_gpu PRIVATE composable_kernel::jit_library
)
endif
()
endif
()
...
...
src/targets/gpu/compile_hip.cpp
View file @
0662a9a3
...
@@ -284,16 +284,20 @@ std::vector<std::vector<char>> compile_hip_src_with_hiprtc(std::vector<hiprtc_sr
...
@@ -284,16 +284,20 @@ std::vector<std::vector<char>> compile_hip_src_with_hiprtc(std::vector<hiprtc_sr
bool
is_hip_clang_compiler
()
bool
is_hip_clang_compiler
()
{
{
static
const
auto
result
=
ends_with
(
MIGRAPHX_STRINGIZE
(
MIGRAPHX_HIP_COMPILER
),
"clang++"
)
;
static
const
auto
result
=
fs
::
path
{
MIGRAPHX_HIP_COMPILER
}.
stem
()
==
"clang++"
;
return
result
;
return
result
;
}
}
#ifdef MIGRAPHX_HIP_COMPILER_LAUNCHER
bool
has_compiler_launcher
()
bool
has_compiler_launcher
()
{
{
static
const
auto
result
=
fs
::
exists
(
MIGRAPHX_
STRINGIZE
(
MIGRAPHX_
HIP_COMPILER_LAUNCHER
)
)
;
static
const
auto
result
=
fs
::
exists
(
MIGRAPHX_HIP_COMPILER_LAUNCHER
);
return
result
;
return
result
;
}
}
#endif
src_compiler
assemble
(
src_compiler
compiler
)
src_compiler
assemble
(
src_compiler
compiler
)
{
{
compiler
.
out_ext
=
".S"
;
compiler
.
out_ext
=
".S"
;
...
@@ -306,8 +310,7 @@ compile_hip_src(const std::vector<src_file>& srcs, std::string params, const std
...
@@ -306,8 +310,7 @@ compile_hip_src(const std::vector<src_file>& srcs, std::string params, const std
{
{
assert
(
not
srcs
.
empty
());
assert
(
not
srcs
.
empty
());
if
(
not
is_hip_clang_compiler
())
if
(
not
is_hip_clang_compiler
())
MIGRAPHX_THROW
(
"Unknown hip compiler: "
+
MIGRAPHX_THROW
(
"Unknown hip compiler: "
MIGRAPHX_HIP_COMPILER
);
std
::
string
(
MIGRAPHX_STRINGIZE
(
MIGRAPHX_HIP_COMPILER
)));
if
(
params
.
find
(
"-std="
)
==
std
::
string
::
npos
)
if
(
params
.
find
(
"-std="
)
==
std
::
string
::
npos
)
params
+=
" --std=c++17"
;
params
+=
" --std=c++17"
;
...
@@ -323,14 +326,14 @@ compile_hip_src(const std::vector<src_file>& srcs, std::string params, const std
...
@@ -323,14 +326,14 @@ compile_hip_src(const std::vector<src_file>& srcs, std::string params, const std
params
+=
" -DMIGRAPHX_DEBUG"
;
params
+=
" -DMIGRAPHX_DEBUG"
;
params
+=
" -Wno-unused-command-line-argument -Wno-cuda-compat "
;
params
+=
" -Wno-unused-command-line-argument -Wno-cuda-compat "
;
params
+=
MIGRAPHX_STRINGIZE
(
MIGRAPHX_HIP_COMPILER_FLAGS
)
;
params
+=
MIGRAPHX_HIP_COMPILER_FLAGS
;
src_compiler
compiler
;
src_compiler
compiler
;
compiler
.
flags
=
params
;
compiler
.
flags
=
params
;
compiler
.
compiler
=
MIGRAPHX_STRINGIZE
(
MIGRAPHX_HIP_COMPILER
)
;
compiler
.
compiler
=
MIGRAPHX_HIP_COMPILER
;
#ifdef MIGRAPHX_HIP_COMPILER_LAUNCHER
#ifdef MIGRAPHX_HIP_COMPILER_LAUNCHER
if
(
has_compiler_launcher
())
if
(
has_compiler_launcher
())
compiler
.
launcher
=
MIGRAPHX_STRINGIZE
(
MIGRAPHX_HIP_COMPILER_LAUNCHER
)
;
compiler
.
launcher
=
MIGRAPHX_HIP_COMPILER_LAUNCHER
;
#endif
#endif
if
(
enabled
(
MIGRAPHX_GPU_DUMP_SRC
{}))
if
(
enabled
(
MIGRAPHX_GPU_DUMP_SRC
{}))
{
{
...
@@ -354,7 +357,7 @@ compile_hip_src(const std::vector<src_file>& srcs, std::string params, const std
...
@@ -354,7 +357,7 @@ compile_hip_src(const std::vector<src_file>& srcs, std::string params, const std
bool
hip_has_flags
(
const
std
::
vector
<
std
::
string
>&
flags
)
bool
hip_has_flags
(
const
std
::
vector
<
std
::
string
>&
flags
)
{
{
src_compiler
compiler
;
src_compiler
compiler
;
compiler
.
compiler
=
MIGRAPHX_STRINGIZE
(
MIGRAPHX_HIP_COMPILER
)
;
compiler
.
compiler
=
MIGRAPHX_HIP_COMPILER
;
compiler
.
flags
=
compiler
.
flags
=
join_strings
(
flags
,
" "
)
+
" -x hip -c --offload-arch=gfx900 --cuda-device-only"
;
join_strings
(
flags
,
" "
)
+
" -x hip -c --offload-arch=gfx900 --cuda-device-only"
;
...
...
src/targets/gpu/compile_miopen.cpp
View file @
0662a9a3
...
@@ -60,9 +60,8 @@ struct miopen_op
...
@@ -60,9 +60,8 @@ struct miopen_op
};
};
MIGRAPHX_REGISTER_OP
(
miopen_op
);
MIGRAPHX_REGISTER_OP
(
miopen_op
);
std
::
size_t
compile_miopen
::
compile
(
operation
&
op
,
instruction_ref
ins
,
bool
format
)
const
std
::
size_t
compile_miopen
::
compile
(
operation
&
op
,
instruction_ref
ins
)
const
{
{
op
.
from_value
({{
"int8_x4_format"
,
format
}});
auto
v
=
op
.
compile
(
*
ctx
,
ins
->
get_shape
(),
to_shapes
(
ins
->
inputs
()));
auto
v
=
op
.
compile
(
*
ctx
,
ins
->
get_shape
(),
to_shapes
(
ins
->
inputs
()));
return
v
.
get
<
std
::
size_t
>
(
"workspace"
,
0
);
return
v
.
get
<
std
::
size_t
>
(
"workspace"
,
0
);
}
}
...
@@ -70,23 +69,13 @@ std::size_t compile_miopen::compile(operation& op, instruction_ref ins, bool for
...
@@ -70,23 +69,13 @@ std::size_t compile_miopen::compile(operation& op, instruction_ref ins, bool for
void
compile_miopen
::
apply
(
module
&
m
)
const
void
compile_miopen
::
apply
(
module
&
m
)
const
{
{
assert
(
ctx
);
assert
(
ctx
);
const
bool
int8_x4_format
=
get_int8_x4_format
(
any_cast
<
migraphx
::
gpu
::
context
>
(
*
ctx
));
for
(
auto
ins
:
iterator_for
(
m
))
for
(
auto
ins
:
iterator_for
(
m
))
{
{
if
(
ins
->
name
()
!=
"gpu::miopen_op"
)
if
(
ins
->
name
()
!=
"gpu::miopen_op"
)
continue
;
continue
;
auto
op
=
any_cast
<
miopen_op
>
(
ins
->
get_operator
()).
op
;
auto
op
=
any_cast
<
miopen_op
>
(
ins
->
get_operator
()).
op
;
std
::
size_t
ws
=
0
;
std
::
size_t
ws
=
0
;
try
ws
=
compile
(
op
,
ins
);
{
// for the regular convolution and convolution_backwards, this try would always succeed
ws
=
compile
(
op
,
ins
,
int8_x4_format
);
}
catch
(
migraphx
::
exception
&
)
{
// In case no solver supports the default format, retry using the other format.
ws
=
compile
(
op
,
ins
,
not
int8_x4_format
);
}
auto
inputs
=
ins
->
inputs
();
auto
inputs
=
ins
->
inputs
();
auto
alloc
=
m
.
insert_instruction
(
auto
alloc
=
m
.
insert_instruction
(
ins
,
make_op
(
"allocate"
,
{{
"shape"
,
to_value
(
shape
{
shape
::
int8_type
,
{
ws
}})}}));
ins
,
make_op
(
"allocate"
,
{{
"shape"
,
to_value
(
shape
{
shape
::
int8_type
,
{
ws
}})}}));
...
...
src/targets/gpu/compile_ops.cpp
View file @
0662a9a3
...
@@ -168,6 +168,7 @@ struct compile_plan
...
@@ -168,6 +168,7 @@ struct compile_plan
}
}
const
compiled_result
&
benchmark
(
problem_cache
&
pc
)
const
const
compiled_result
&
benchmark
(
problem_cache
&
pc
)
const
{
{
const
auto
trace_level
=
value_of
(
MIGRAPHX_TRACE_BENCHMARKING
{});
if
(
results
.
empty
())
if
(
results
.
empty
())
MIGRAPHX_THROW
(
"No configs to tune"
);
MIGRAPHX_THROW
(
"No configs to tune"
);
if
(
results
.
size
()
==
1
)
if
(
results
.
size
()
==
1
)
...
@@ -178,9 +179,10 @@ struct compile_plan
...
@@ -178,9 +179,10 @@ struct compile_plan
}
}
if
(
not
config
)
if
(
not
config
)
MIGRAPHX_THROW
(
"Multiple kernels without config"
);
MIGRAPHX_THROW
(
"Multiple kernels without config"
);
if
(
trace_level
>
0
)
std
::
cout
<<
"Benchmarking "
<<
preop
.
name
()
<<
": "
<<
results
.
size
()
<<
" configs"
std
::
cout
<<
"Benchmarking "
<<
preop
.
name
()
<<
": "
<<
results
.
size
()
<<
" configs"
<<
std
::
endl
;
<<
std
::
endl
;
if
(
enabled
(
MIGRAPHX_TRACE_BENCHMARKING
{})
)
if
(
trace_level
>
1
)
std
::
cout
<<
"Problem: "
<<
config
->
problem
<<
std
::
endl
;
std
::
cout
<<
"Problem: "
<<
config
->
problem
<<
std
::
endl
;
std
::
vector
<
double
>
times
;
std
::
vector
<
double
>
times
;
times
.
reserve
(
results
.
size
());
times
.
reserve
(
results
.
size
());
...
@@ -189,21 +191,22 @@ struct compile_plan
...
@@ -189,21 +191,22 @@ struct compile_plan
config
->
solutions
.
begin
(),
config
->
solutions
.
begin
(),
std
::
back_inserter
(
times
),
std
::
back_inserter
(
times
),
[
&
](
const
auto
&
cr
,
const
auto
&
solution
)
{
[
&
](
const
auto
&
cr
,
const
auto
&
solution
)
{
if
(
enabled
(
MIGRAPHX_TRACE_BENCHMARKING
{})
)
if
(
trace_level
>
1
)
std
::
cout
<<
"Benchmarking solution: "
<<
solution
<<
std
::
endl
;
std
::
cout
<<
"Benchmarking solution: "
<<
solution
<<
std
::
endl
;
if
(
not
cr
.
has_value
())
if
(
not
cr
.
has_value
())
{
{
if
(
enabled
(
MIGRAPHX_TRACE_BENCHMARKING
{})
)
if
(
trace_level
>
1
)
std
::
cout
<<
"No binary"
<<
std
::
endl
;
std
::
cout
<<
"No binary"
<<
std
::
endl
;
return
std
::
numeric_limits
<
double
>::
max
();
return
std
::
numeric_limits
<
double
>::
max
();
}
}
auto
t
=
time_op
(
auto
t
=
time_op
(
*
ctx
,
cr
->
replace
.
code_object
,
to_shapes
(
cr
->
ins
->
inputs
()),
20
);
*
ctx
,
cr
->
replace
.
code_object
,
to_shapes
(
cr
->
ins
->
inputs
()),
20
);
if
(
enabled
(
MIGRAPHX_TRACE_BENCHMARKING
{})
)
if
(
trace_level
>
1
)
std
::
cout
<<
t
<<
"ms"
<<
std
::
endl
;
std
::
cout
<<
t
<<
"ms"
<<
std
::
endl
;
return
t
;
return
t
;
});
});
auto
i
=
std
::
distance
(
times
.
begin
(),
std
::
min_element
(
times
.
begin
(),
times
.
end
()));
auto
i
=
std
::
distance
(
times
.
begin
(),
std
::
min_element
(
times
.
begin
(),
times
.
end
()));
if
(
trace_level
>
0
)
std
::
cout
<<
"Fastest solution: "
<<
config
->
solutions
.
at
(
i
)
<<
std
::
endl
;
std
::
cout
<<
"Fastest solution: "
<<
config
->
solutions
.
at
(
i
)
<<
std
::
endl
;
pc
.
insert
(
preop
.
name
(),
config
->
problem
,
config
->
solutions
.
at
(
i
));
pc
.
insert
(
preop
.
name
(),
config
->
problem
,
config
->
solutions
.
at
(
i
));
if
(
not
results
[
i
].
has_value
())
if
(
not
results
[
i
].
has_value
())
...
...
src/targets/gpu/device/int8_gemm_pack.cpp
deleted
100644 → 0
View file @
b74d3a8f
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/shape.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/gpu/device/int8_gemm_pack.hpp>
#include <migraphx/gpu/device/launch.hpp>
#include <migraphx/gpu/device/types.hpp>
#include <migraphx/gpu/device/tensor.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
void
int8_gemm_pack_a
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
{
auto
comp_shape
=
arg
.
get_shape
();
auto
out_lens
=
comp_shape
.
lens
();
auto
dim_0
=
out_lens
.
size
()
-
2
;
auto
dim_1
=
out_lens
.
size
()
-
1
;
std
::
size_t
lda
=
comp_shape
.
strides
()[
dim_0
];
std
::
size_t
m_size
=
out_lens
[
dim_0
]
*
out_lens
[
dim_1
];
visit_all
(
result
,
arg
)([
&
](
auto
output
,
auto
input
)
{
std
::
size_t
nelements
=
comp_shape
.
elements
();
auto
*
out_ptr
=
device_cast
(
output
.
data
());
auto
*
in_ptr
=
device_cast
(
input
.
data
());
visit_tensor_size
(
out_lens
.
size
(),
[
&
](
auto
out_dim
)
{
hip_tensor_descriptor
<
out_dim
>
desc
(
comp_shape
);
gs_launch
(
stream
,
nelements
,
256
)([
=
](
auto
ii
)
__device__
{
const
size_t
nb
=
4
;
auto
idx
=
desc
.
multi
(
ii
);
std
::
size_t
i_m
=
idx
[
dim_1
];
std
::
size_t
i_k
=
idx
[
dim_0
];
std
::
size_t
offset
=
ii
/
m_size
*
m_size
;
out_ptr
[
i_k
%
nb
+
(
i_m
+
(
i_k
/
nb
)
*
lda
)
*
nb
+
offset
]
=
in_ptr
[
i_m
+
i_k
*
lda
+
offset
];
});
});
});
}
void
int8_gemm_pack_b
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
{
auto
trans_shape
=
arg
.
get_shape
();
auto
out_lens
=
trans_shape
.
lens
();
auto
dim_0
=
trans_shape
.
lens
().
size
()
-
2
;
auto
dim_1
=
trans_shape
.
lens
().
size
()
-
1
;
std
::
size_t
ldb
=
trans_shape
.
strides
()[
dim_1
];
auto
wrap_lens
=
out_lens
;
std
::
swap
(
wrap_lens
[
dim_0
],
wrap_lens
[
dim_1
]);
shape
comp_shape
{
trans_shape
.
type
(),
wrap_lens
};
std
::
size_t
m_size
=
out_lens
[
dim_0
]
*
out_lens
[
dim_1
];
visit_all
(
result
,
arg
)([
&
](
auto
output
,
auto
input
)
{
std
::
size_t
nelements
=
comp_shape
.
elements
();
auto
*
out_ptr
=
device_cast
(
output
.
data
());
auto
*
in_ptr
=
device_cast
(
input
.
data
());
visit_tensor_size
(
out_lens
.
size
(),
[
&
](
auto
out_dim
)
{
hip_tensor_descriptor
<
out_dim
>
desc
(
comp_shape
);
gs_launch
(
stream
,
nelements
,
256
)([
=
](
auto
ii
)
__device__
{
const
size_t
nb
=
4
;
auto
idx
=
desc
.
multi
(
ii
);
std
::
size_t
i_n
=
idx
[
dim_1
];
std
::
size_t
i_k
=
idx
[
dim_0
];
std
::
size_t
offset
=
ii
/
m_size
*
m_size
;
out_ptr
[
i_k
%
nb
+
(
i_n
+
(
i_k
/
nb
)
*
ldb
)
*
nb
+
offset
]
=
in_ptr
[
i_n
+
i_k
*
ldb
+
offset
];
});
});
});
}
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/gemm_impl.cpp
View file @
0662a9a3
This diff is collapsed.
Click to expand it.
src/targets/gpu/include/migraphx/gpu/compile_miopen.hpp
View file @
0662a9a3
...
@@ -42,7 +42,7 @@ struct compile_miopen
...
@@ -42,7 +42,7 @@ struct compile_miopen
context
*
ctx
=
nullptr
;
context
*
ctx
=
nullptr
;
std
::
string
name
()
const
{
return
"gpu::compile_miopen"
;
}
std
::
string
name
()
const
{
return
"gpu::compile_miopen"
;
}
void
apply
(
module
&
m
)
const
;
void
apply
(
module
&
m
)
const
;
std
::
size_t
compile
(
operation
&
op
,
instruction_ref
ins
,
bool
format
)
const
;
std
::
size_t
compile
(
operation
&
op
,
instruction_ref
ins
)
const
;
};
};
}
// namespace gpu
}
// namespace gpu
...
...
src/targets/gpu/include/migraphx/gpu/convolution.hpp
View file @
0662a9a3
...
@@ -57,7 +57,6 @@ template <class Op>
...
@@ -57,7 +57,6 @@ template <class Op>
struct
miopen_convolution
struct
miopen_convolution
{
{
Op
op
;
Op
op
;
bool
int8_x4_format
=
false
;
shared
<
convolution_descriptor
>
cd
=
nullptr
;
shared
<
convolution_descriptor
>
cd
=
nullptr
;
miopenConvFwdAlgorithm_t
algo
{};
miopenConvFwdAlgorithm_t
algo
{};
#ifdef MIGRAPHX_HAS_FIND_2_API
#ifdef MIGRAPHX_HAS_FIND_2_API
...
@@ -74,7 +73,6 @@ struct miopen_convolution
...
@@ -74,7 +73,6 @@ struct miopen_convolution
f
(
self
.
solution_object
,
"solution_object"
),
f
(
self
.
solution_object
,
"solution_object"
),
#endif
#endif
f
(
self
.
algo
,
"algo"
),
f
(
self
.
algo
,
"algo"
),
f
(
self
.
int8_x4_format
,
"int8_x4_format"
),
f
(
self
.
solution_id
,
"solution_id"
));
f
(
self
.
solution_id
,
"solution_id"
));
}
}
...
@@ -94,8 +92,8 @@ struct miopen_convolution
...
@@ -94,8 +92,8 @@ struct miopen_convolution
argument
argument
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
{
{
auto
x_desc
=
make_tensor
(
reshape_if_1d
(
args
[
0
].
get_shape
())
,
int8_x4_format
);
auto
x_desc
=
make_tensor
(
reshape_if_1d
(
args
[
0
].
get_shape
()));
auto
w_desc
=
make_tensor
(
reshape_if_1d
(
args
[
1
].
get_shape
())
,
int8_x4_format
);
auto
w_desc
=
make_tensor
(
reshape_if_1d
(
args
[
1
].
get_shape
()));
auto
y_desc
=
make_tensor
(
reshape_if_1d
(
output_shape
));
auto
y_desc
=
make_tensor
(
reshape_if_1d
(
output_shape
));
auto
*
miopen_stream_handle
=
ctx
.
get_stream
().
get_miopen
();
auto
*
miopen_stream_handle
=
ctx
.
get_stream
().
get_miopen
();
auto
workspace_size
=
args
[
2
].
get_shape
().
bytes
();
auto
workspace_size
=
args
[
2
].
get_shape
().
bytes
();
...
@@ -162,8 +160,8 @@ struct miopen_convolution
...
@@ -162,8 +160,8 @@ struct miopen_convolution
shape
find
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
shape
>&
inputs
)
shape
find
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
shape
>&
inputs
)
{
{
shape
workspace_shape
{};
shape
workspace_shape
{};
auto
x_desc
=
make_tensor
(
reshape_if_1d
(
inputs
[
0
])
,
int8_x4_format
);
auto
x_desc
=
make_tensor
(
reshape_if_1d
(
inputs
[
0
]));
auto
w_desc
=
make_tensor
(
reshape_if_1d
(
inputs
[
1
])
,
int8_x4_format
);
auto
w_desc
=
make_tensor
(
reshape_if_1d
(
inputs
[
1
]));
auto
y_desc
=
make_tensor
(
reshape_if_1d
(
output_shape
));
auto
y_desc
=
make_tensor
(
reshape_if_1d
(
output_shape
));
auto
*
miopen_stream_handle
=
ctx
.
get_stream
().
get_miopen
();
auto
*
miopen_stream_handle
=
ctx
.
get_stream
().
get_miopen
();
...
@@ -179,13 +177,8 @@ struct miopen_convolution
...
@@ -179,13 +177,8 @@ struct miopen_convolution
workspace_shape
=
shape
{
shape
::
int8_type
,
{
workspace_size
}};
workspace_shape
=
shape
{
shape
::
int8_type
,
{
workspace_size
}};
auto
x_shape
=
inputs
[
0
];
const
auto
&
x_shape
=
inputs
[
0
];
auto
w_shape
=
inputs
[
1
];
const
auto
&
w_shape
=
inputs
[
1
];
if
(
int8_x4_format
)
{
x_shape
=
pack_int8_shape
(
x_shape
);
w_shape
=
pack_int8_shape
(
w_shape
);
}
#ifdef MIGRAPHX_HAS_FIND_2_API
#ifdef MIGRAPHX_HAS_FIND_2_API
{
{
...
@@ -327,8 +320,8 @@ struct miopen_convolution
...
@@ -327,8 +320,8 @@ struct miopen_convolution
": workspace has changed during finalization."
);
": workspace has changed during finalization."
);
}
}
auto
x_desc
=
make_tensor
(
reshape_if_1d
(
inputs
[
0
])
,
int8_x4_format
);
auto
x_desc
=
make_tensor
(
reshape_if_1d
(
inputs
[
0
]));
auto
w_desc
=
make_tensor
(
reshape_if_1d
(
inputs
[
1
])
,
int8_x4_format
);
auto
w_desc
=
make_tensor
(
reshape_if_1d
(
inputs
[
1
]));
auto
y_desc
=
make_tensor
(
reshape_if_1d
(
output_shape
));
auto
y_desc
=
make_tensor
(
reshape_if_1d
(
output_shape
));
auto
status
=
miopenConvolutionForwardCompileSolution
(
ctx
.
get_stream
().
get_miopen
(),
auto
status
=
miopenConvolutionForwardCompileSolution
(
ctx
.
get_stream
().
get_miopen
(),
...
@@ -347,21 +340,6 @@ struct miopen_convolution
...
@@ -347,21 +340,6 @@ struct miopen_convolution
{
{
return
shapes
.
size
()
-
1
;
return
shapes
.
size
()
-
1
;
}
}
inline
shape
pack_int8_shape
(
const
shape
&
s
)
const
{
if
(
s
.
type
()
!=
shape
::
int8_type
)
{
return
s
;
}
auto
lens
=
s
.
lens
();
auto
strides
=
s
.
strides
();
lens
[
1
]
=
(
lens
[
1
]
+
3
)
/
4
*
4
;
strides
[
0
]
=
strides
[
1
]
*
lens
[
1
];
return
{
s
.
type
(),
lens
,
strides
};
}
};
};
}
// namespace gpu
}
// namespace gpu
...
...
src/targets/gpu/include/migraphx/gpu/device/int8_gemm_pack.hpp
deleted
100644 → 0
View file @
b74d3a8f
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_INT8_GEMM_PACK_HPP
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_INT8_GEMM_PACK_HPP
#include <migraphx/argument.hpp>
#include <migraphx/gpu/device/config.hpp>
#include <hip/hip_runtime_api.h>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
void
MIGRAPHX_DEVICE_EXPORT
int8_gemm_pack_a
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
);
void
MIGRAPHX_DEVICE_EXPORT
int8_gemm_pack_b
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
);
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/include/migraphx/gpu/gemm.hpp
View file @
0662a9a3
/*
/*
* The MIT License (MIT)
* The MIT License (MIT)
*
*
* Copyright (c) 2015-202
2
Advanced Micro Devices, Inc. All rights reserved.
* Copyright (c) 2015-202
3
Advanced Micro Devices, Inc. All rights reserved.
*
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* of this software and associated documentation files (the "Software"), to deal
...
@@ -40,9 +40,8 @@ inline namespace MIGRAPHX_INLINE_NS {
...
@@ -40,9 +40,8 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace
gpu
{
namespace
gpu
{
struct
context
;
struct
context
;
void
blas_shape
(
const
shape
&
s
);
shape
transpose_batch
(
const
shape
&
s
,
unsigned
trans_batch
);
shape
transpose_batch
(
const
shape
&
s
,
unsigned
trans_batch
);
void
blas_shape
(
const
shape
&
s
);
template
<
class
Op
>
template
<
class
Op
>
struct
rocblas_gemm
struct
rocblas_gemm
...
@@ -50,9 +49,9 @@ struct rocblas_gemm
...
@@ -50,9 +49,9 @@ struct rocblas_gemm
Op
op
;
Op
op
;
float
alpha
=
1
;
float
alpha
=
1
;
float
beta
=
0
;
float
beta
=
0
;
bool
int8_x4_format
=
true
;
bool
compute_fp32
=
false
;
bool
compute_fp32
=
false
;
unsigned
trans_batch
=
0
;
unsigned
trans_batch
=
0
;
int32_t
solution_idx
=
0
;
template
<
class
Self
,
class
F
>
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
static
auto
reflect
(
Self
&
self
,
F
f
)
...
@@ -60,9 +59,9 @@ struct rocblas_gemm
...
@@ -60,9 +59,9 @@ struct rocblas_gemm
return
pack_join
(
migraphx
::
reflect
(
self
.
op
,
f
),
return
pack_join
(
migraphx
::
reflect
(
self
.
op
,
f
),
pack
(
f
(
self
.
alpha
,
"alpha"
),
pack
(
f
(
self
.
alpha
,
"alpha"
),
f
(
self
.
beta
,
"beta"
),
f
(
self
.
beta
,
"beta"
),
f
(
self
.
int8_x4_format
,
"int8_x4_format"
),
f
(
self
.
compute_fp32
,
"compute_fp32"
),
f
(
self
.
compute_fp32
,
"compute_fp32"
),
f
(
self
.
trans_batch
,
"trans_batch"
)));
f
(
self
.
trans_batch
,
"trans_batch"
),
f
(
self
.
solution_idx
,
"solution_idx"
)));
}
}
std
::
string
name
()
const
std
::
string
name
()
const
...
@@ -78,6 +77,8 @@ struct rocblas_gemm
...
@@ -78,6 +77,8 @@ struct rocblas_gemm
{
{
std
::
vector
<
shape
>
in_shapes
(
inputs
);
std
::
vector
<
shape
>
in_shapes
(
inputs
);
in_shapes
.
pop_back
();
in_shapes
.
pop_back
();
// When input shapes are A, B, C the GEMM equation is C = α AB+ β C where α, β are
// scalars
check_shapes
{
in_shapes
,
*
this
}.
has
(
2
,
3
);
check_shapes
{
in_shapes
,
*
this
}.
has
(
2
,
3
);
blas_shape
(
inputs
[
0
]);
blas_shape
(
inputs
[
0
]);
blas_shape
(
inputs
[
1
]);
blas_shape
(
inputs
[
1
]);
...
@@ -113,17 +114,12 @@ struct rocblas_gemm
...
@@ -113,17 +114,12 @@ struct rocblas_gemm
{
{
if
(
this
->
name
()
==
"gpu::gemm"
)
if
(
this
->
name
()
==
"gpu::gemm"
)
{
{
gemm
(
ctx
,
output_shape
,
args
,
alpha
,
beta
,
int8_x4_format
,
compute_fp32
);
gemm
_compute
(
ctx
,
output_shape
,
args
,
alpha
,
beta
,
compute_fp32
,
solution_idx
);
}
}
else
else
{
{
gemm
(
ctx
,
gemm_compute
(
output_shape
,
ctx
,
output_shape
,
args
,
int32_t
(
alpha
),
int32_t
(
beta
),
compute_fp32
,
solution_idx
);
args
,
int32_t
(
alpha
),
int32_t
(
beta
),
int8_x4_format
,
compute_fp32
);
}
}
return
args
.
back
();
return
args
.
back
();
}
}
...
@@ -132,6 +128,33 @@ struct rocblas_gemm
...
@@ -132,6 +128,33 @@ struct rocblas_gemm
{
{
return
shapes
.
size
()
-
1
;
return
shapes
.
size
()
-
1
;
}
}
void
finalize
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
shape
>&
input_shapes
)
{
#ifdef MIGRAPHX_USE_ROCBLAS_TUNING_API
if
(
enabled
(
MIGRAPHX_ENABLE_GEMM_TUNING
{})
or
ctx
.
get_exhaustive_tune_flag
())
{
if
(
this
->
name
()
==
"gpu::gemm"
)
{
solution_idx
=
gemm_finalize
(
ctx
,
output_shape
,
input_shapes
,
alpha
,
beta
,
compute_fp32
,
solution_idx
);
}
else
{
solution_idx
=
gemm_finalize
(
ctx
,
output_shape
,
input_shapes
,
int32_t
(
alpha
),
int32_t
(
beta
),
compute_fp32
,
solution_idx
);
}
}
#else
// suppress compiler warnings
(
void
)
ctx
,
(
void
)
output_shape
,
(
void
)
input_shapes
;
#endif
}
};
};
}
// namespace gpu
}
// namespace gpu
...
...
Prev
1
2
3
4
5
6
7
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment