Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
0662a9a3
Unverified
Commit
0662a9a3
authored
Nov 09, 2023
by
Brian Pickrell
Committed by
GitHub
Nov 09, 2023
Browse files
Merge branch 'develop' into dyn_resize_gather
parents
b74d3a8f
35e5298e
Changes
130
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
804 additions
and
439 deletions
+804
-439
src/onnx/parse_multinomial.cpp
src/onnx/parse_multinomial.cpp
+74
-16
src/onnx/parse_resize.cpp
src/onnx/parse_resize.cpp
+71
-3
src/onnx/parse_slice.cpp
src/onnx/parse_slice.cpp
+7
-5
src/onnx/parse_split.cpp
src/onnx/parse_split.cpp
+26
-5
src/program.cpp
src/program.cpp
+1
-1
src/py/migraphx_py.cpp
src/py/migraphx_py.cpp
+5
-2
src/quantization.cpp
src/quantization.cpp
+1
-1
src/rewrite_quantization.cpp
src/rewrite_quantization.cpp
+1
-1
src/simplify_dyn_ops.cpp
src/simplify_dyn_ops.cpp
+46
-2
src/simplify_reshapes.cpp
src/simplify_reshapes.cpp
+2
-2
src/targets/gpu/CMakeLists.txt
src/targets/gpu/CMakeLists.txt
+24
-22
src/targets/gpu/compile_hip.cpp
src/targets/gpu/compile_hip.cpp
+11
-8
src/targets/gpu/compile_miopen.cpp
src/targets/gpu/compile_miopen.cpp
+4
-15
src/targets/gpu/compile_ops.cpp
src/targets/gpu/compile_ops.cpp
+10
-7
src/targets/gpu/device/int8_gemm_pack.cpp
src/targets/gpu/device/int8_gemm_pack.cpp
+0
-97
src/targets/gpu/gemm_impl.cpp
src/targets/gpu/gemm_impl.cpp
+474
-157
src/targets/gpu/include/migraphx/gpu/compile_miopen.hpp
src/targets/gpu/include/migraphx/gpu/compile_miopen.hpp
+1
-1
src/targets/gpu/include/migraphx/gpu/convolution.hpp
src/targets/gpu/include/migraphx/gpu/convolution.hpp
+9
-31
src/targets/gpu/include/migraphx/gpu/device/int8_gemm_pack.hpp
...argets/gpu/include/migraphx/gpu/device/int8_gemm_pack.hpp
+0
-49
src/targets/gpu/include/migraphx/gpu/gemm.hpp
src/targets/gpu/include/migraphx/gpu/gemm.hpp
+37
-14
No files found.
src/onnx/parse_multinomial.cpp
View file @
0662a9a3
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-202
2
Advanced Micro Devices, Inc. All rights reserved.
* Copyright (c) 2015-202
3
Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
...
...
@@ -41,6 +41,9 @@ struct parse_multinomial : op_parser<parse_multinomial>
const
onnx_parser
::
node_info
&
info
,
std
::
vector
<
instruction_ref
>
args
)
const
{
if
(
args
.
empty
())
MIGRAPHX_THROW
(
"PARSE_MULTINOMIAL: no arguments given"
);
int
dtype
=
6
;
if
(
contains
(
info
.
attributes
,
"dtype"
))
dtype
=
info
.
attributes
.
at
(
"dtype"
).
i
();
...
...
@@ -49,35 +52,90 @@ struct parse_multinomial : op_parser<parse_multinomial>
size_t
sample_size
=
1
;
if
(
contains
(
info
.
attributes
,
"sample_size"
))
sample_size
=
info
.
attributes
.
at
(
"sample_size"
).
i
();
else
MIGRAPHX_THROW
(
"PARSE_MULTINOMIAL: sample_size not given"
);
// Use logarithmic math to scale probabilities while avoiding division by very
// small numbers. Scaling by the maximum makes very tiny ranges more
// tractable; any constant factor gives equivalent distr. since the Multinomial op.
// normalizes at runtime.
// Subtract the per-batch maximum log-probability, making the per-batch max 0
auto
maxes
=
info
.
add_instruction
(
migraphx
::
make_op
(
"reduce_max"
,
{{
"axes"
,
{
1
}}}),
args
[
0
]);
auto
mb_maxes
=
info
.
add_instruction
(
migraphx
::
make_op
(
"multibroadcast"
,
{{
"out_lens"
,
args
[
0
]
->
get_shape
().
lens
()}}),
maxes
);
auto
cdf
=
info
.
add_instruction
(
migraphx
::
make_op
(
"sub"
),
args
[
0
],
mb_maxes
);
auto
cdf
=
info
.
add_common_op
(
"sub"
,
args
[
0
],
maxes
);
// Take the element-wise exponent to get probabilities in the range (0, 1]
cdf
=
info
.
add_instruction
(
migraphx
::
make_op
(
"exp"
),
cdf
);
// Compute the cumulative d
ensity
function
// Compute the cumulative d
istribution
function
cdf
=
info
.
add_instruction
(
migraphx
::
make_op
(
"prefix_scan_sum"
,
{{
"axis"
,
1
},
{
"exclusive"
,
false
}}),
cdf
);
// Pre-compute random distribution
std
::
mt19937
gen
(
std
::
chrono
::
high_resolution_clock
::
now
().
time_since_epoch
().
count
());
instruction_ref
seed_input
;
if
(
contains
(
info
.
attributes
,
"seed"
))
gen
.
seed
(
info
.
attributes
.
at
(
"seed"
).
f
());
{
float
seed
=
info
.
attributes
.
at
(
"seed"
).
f
();
migraphx
::
shape
s
{
migraphx
::
shape
::
float_type
,
{
1
}};
std
::
vector
<
float
>
data
=
{
seed
};
seed_input
=
info
.
add_literal
(
migraphx
::
literal
(
s
,
data
));
}
else
{
seed_input
=
info
.
add_instruction
(
migraphx
::
make_op
(
"random_seed"
));
}
instruction_ref
randoms
;
shape
s0
=
args
[
0
]
->
get_shape
();
if
(
s0
.
dynamic
())
{
// Dynamic batch_size will be taken from args[0]. The input argument to this should
// have a second dimension of sample_size.
std
::
vector
<
shape
::
dynamic_dimension
>
dyn_dim_set
;
dyn_dim_set
.
emplace_back
(
s0
.
dyn_dims
().
front
());
dyn_dim_set
.
emplace_back
(
shape
::
dynamic_dimension
{
sample_size
,
sample_size
});
// read the input dimensions
auto
dim_of
=
info
.
add_instruction
(
migraphx
::
make_op
(
"dimensions_of"
,
{{
"end"
,
2
}}),
args
[
0
]);
// The next two operations insert the value sample_size into the second array position
// make an argument of (1, 0)
shape
s
(
shape
::
int64_type
,
{
2
});
std
::
vector
<
int64_t
>
data1
{
1
,
0
};
auto
l1
=
info
.
add_literal
(
s
,
data1
);
auto
batch_arg
=
info
.
add_instruction
(
migraphx
::
make_op
(
"mul"
),
dim_of
,
l1
);
std
::
vector
<
int64_t
>
data2
(
2
,
0
);
// make an argument of (0, sample_size)
data2
[
1
]
=
sample_size
;
auto
l2
=
info
.
add_literal
(
s
,
data2
);
auto
alloc_shape
=
info
.
add_instruction
(
migraphx
::
make_op
(
"add"
),
batch_arg
,
l2
);
// alloc_shape should contain the input-based shape dimensions as its values at runtime,
// and its own shape is {2}
// compile_shape is the shape used when compiling the Allocate op, and may be dynamic
migraphx
::
shape
compile_shape
=
migraphx
::
shape
(
s0
.
type
(),
{
s0
.
dyn_dims
().
front
(),
{
sample_size
,
sample_size
}});
std
::
uniform_real_distribution
<>
dis
(
0.0
,
1.0
);
size_t
batch_size
=
args
[
0
]
->
get_shape
().
lens
().
front
();
migraphx
::
shape
dist_shape
{
migraphx
::
shape
::
float_type
,
{
batch_size
,
sample_size
}};
// Allocate on-device storage for the random values
auto
alloc
=
info
.
add_instruction
(
migraphx
::
make_op
(
"allocate"
,
{{
"shape"
,
to_value
(
compile_shape
)}}),
alloc_shape
);
randoms
=
info
.
add_instruction
(
migraphx
::
make_op
(
"random_uniform"
),
seed_input
,
alloc
);
}
else
{
// use literal. The array populated by random_uniform may have any shape, as long its
// number of elements is batch_size * sample_size .
size_t
batch_size
=
s0
.
lens
().
front
();
auto
rand_dummy
=
info
.
add_literal
(
migraphx
::
literal
{
migraphx
::
shape
::
float_type
,
{
batch_size
*
sample_size
}});
std
::
vector
<
float
>
random_dist
(
batch_size
*
sample_size
);
std
::
generate
(
random_dist
.
begin
(),
random_dist
.
end
(),
[
&
]()
{
return
dis
(
gen
);
}
);
auto
dist_lit
=
info
.
add_literal
(
migraphx
::
literal
{
dist_shape
,
random_dist
});
randoms
=
info
.
add_instruction
(
migraphx
::
make_op
(
"random_uniform"
),
seed_input
,
rand_dummy
);
}
return
info
.
add_instruction
(
migraphx
::
make_op
(
"multinomial"
,
{{
"dtype"
,
output_type
}}),
cdf
,
dist_lit
);
migraphx
::
make_op
(
"multinomial"
,
{{
"dtype"
,
output_type
}}),
cdf
,
randoms
);
}
};
...
...
src/onnx/parse_resize.cpp
View file @
0662a9a3
...
...
@@ -181,6 +181,76 @@ static std::string get_nearest_mode(const onnx_parser::attribute_map& attr)
return
nearest_mode
;
}
static
std
::
vector
<
double
>
get_scales
(
const
onnx_parser
::
attribute_map
&
attr
)
{
std
::
vector
<
double
>
scales
;
if
(
contains
(
attr
,
"scales"
))
{
copy
(
attr
.
at
(
"scales"
).
floats
(),
std
::
back_inserter
(
scales
));
}
return
scales
;
}
static
void
parse_args
(
const
std
::
vector
<
instruction_ref
>&
args
,
const
std
::
vector
<
size_t
>&
in_lens
,
const
std
::
string
&
op_name
,
std
::
vector
<
double
>&
vec_scale
,
std
::
vector
<
std
::
size_t
>&
out_lens
)
{
for
(
const
auto
&
arg
:
args
)
{
if
(
arg
->
name
()
==
"undefined"
or
arg
==
args
.
front
())
{
continue
;
}
// skipped empty input
auto
lens
=
arg
->
get_shape
().
lens
();
if
(
lens
.
empty
())
{
continue
;
}
auto
type
=
arg
->
get_shape
().
type
();
// output size
if
(
type
==
shape
::
int64_type
)
{
auto
arg_out_s
=
arg
->
eval
();
check_arg_empty
(
arg_out_s
,
"PARSE_"
+
op_name
+
": dynamic output size is not supported!"
);
arg_out_s
.
visit
([
&
](
const
auto
&
ol
)
{
out_lens
.
assign
(
ol
.
begin
(),
ol
.
end
());
});
if
(
out_lens
.
size
()
!=
in_lens
.
size
())
{
MIGRAPHX_THROW
(
"PARSE_"
+
op_name
+
": specified output size does not match input size"
);
}
// compute the scale
vec_scale
.
resize
(
in_lens
.
size
());
std
::
transform
(
in_lens
.
begin
(),
in_lens
.
end
(),
out_lens
.
begin
(),
vec_scale
.
begin
(),
[](
auto
iss
,
auto
oss
)
{
return
1.0
*
oss
/
iss
;
});
}
else
{
// scale input
if
(
lens
[
0
]
==
in_lens
.
size
())
{
auto
arg_scale
=
arg
->
eval
();
check_arg_empty
(
arg_scale
,
"PARSE_"
+
op_name
+
": dynamic input scale is not supported!"
);
arg_scale
.
visit
([
&
](
const
auto
&
v
)
{
vec_scale
.
assign
(
v
.
begin
(),
v
.
end
());
});
}
}
}
}
struct
parse_resize
:
op_parser
<
parse_resize
>
{
std
::
vector
<
op_desc
>
operators
()
const
{
return
{{
"Resize"
},
{
"Upsample"
}};
}
...
...
@@ -309,7 +379,7 @@ struct parse_resize : op_parser<parse_resize>
std
::
vector
<
size_t
>
out_lens
(
in_s
.
ndim
());
// scale
std
::
vector
<
double
>
vec_scale
;
std
::
vector
<
double
>
vec_scale
=
get_scales
(
info
.
attributes
)
;
// Look at inputs and infer either output size or scale, depending on input type
for
(
const
auto
&
arg
:
args
)
...
...
@@ -381,8 +451,6 @@ struct parse_resize : op_parser<parse_resize>
}
}
// Dynamic batch: Only args[0] can have a dynamic shape, only the 0'th
// dimension--batch size--can be non-fixed, and the only resize mode allowed is "nearest"
if
(
args
[
0
]
->
get_shape
().
dynamic
())
{
return
dynamic_nearest_parse
(
out_lens
,
vec_scale
,
opd
,
info
,
args
);
...
...
src/onnx/parse_slice.cpp
View file @
0662a9a3
...
...
@@ -46,6 +46,9 @@ struct parse_slice : op_parser<parse_slice>
void
always_insert
(
instruction_ref
arg
)
{
op_args
.
insert
(
op_args
.
begin
(),
arg
);
}
/**
* Either insert argument into `this->op_args` or return the constant value of the argument
*/
std
::
vector
<
int64_t
>
insert
(
instruction_ref
arg
)
{
std
::
vector
<
int64_t
>
result
;
...
...
@@ -137,23 +140,22 @@ struct parse_slice : op_parser<parse_slice>
sd
.
always_insert
(
args
.
at
(
0
));
// If axes arg is not given, the default is all of them.
if
(
sd
.
op
.
axes
.
empty
()
and
sd
.
op_args
.
size
()
<
3
)
if
(
sd
.
op
.
axes
.
empty
()
and
sd
.
op_args
.
size
()
<
=
3
)
{
std
::
vector
<
int64_t
>
axes
(
args
[
0
]
->
get_shape
().
ndim
());
std
::
iota
(
axes
.
begin
(),
axes
.
end
(),
int64_t
{
0
});
sd
.
op
.
axes
=
axes
;
}
if
(
not
sd
.
steps
.
empty
(
))
if
(
std
::
any_of
(
sd
.
steps
.
begin
(),
sd
.
steps
.
end
(),
[](
auto
s
)
{
return
s
!=
1
;
}
))
{
if
(
sd
.
op
.
starts
.
empty
()
or
sd
.
op
.
ends
.
empty
())
MIGRAPHX_THROW
(
"PARSE_SLICE: steps and variable starts and ends is not supported"
);
MIGRAPHX_THROW
(
"PARSE_SLICE: steps and variable starts and/or ends is not supported"
);
if
(
sd
.
op
.
axes
.
empty
())
MIGRAPHX_THROW
(
"PARSE_SLICE: steps and variable axes is not supported"
);
}
assert
(
sd
.
steps
.
empty
()
or
sd
.
steps
.
size
()
==
sd
.
op
.
axes
.
size
());
// If any axes have negative step, prepare to add a "reverse" op
for
(
auto
i
:
range
(
sd
.
steps
.
size
()))
{
...
...
src/onnx/parse_split.cpp
View file @
0662a9a3
...
...
@@ -68,13 +68,34 @@ struct parse_split : op_parser<parse_split>
// no split attribute, input is equally divided
else
{
if
((
lens
[
tuned_axis
]
%
info
.
num_outputs
)
!=
0
)
std
::
size_t
num_outputs
=
info
.
num_outputs
;
// the num_outputs attribute seems to be redundant since we already have
// node_info::num_outputs, but we can still perform an error check
if
(
contains
(
info
.
attributes
,
"num_outputs"
))
{
MIGRAPHX_THROW
(
"PARSE_SPLIT: input cannot be equally divided into "
+
std
::
to_string
(
info
.
num_outputs
)
+
" splits!"
);
num_outputs
=
parser
.
parse_value
(
info
.
attributes
.
at
(
"num_outputs"
)).
at
<
std
::
size_t
>
();
if
(
num_outputs
!=
info
.
num_outputs
)
{
MIGRAPHX_THROW
(
"PARSE_SPLIT: num_outputs attribute "
+
std
::
to_string
(
num_outputs
)
+
" doesn't match actual number of outputs "
+
std
::
to_string
(
info
.
num_outputs
)
+
"!"
);
}
}
if
(
lens
[
tuned_axis
]
%
num_outputs
==
0
)
{
std
::
size_t
chunk_size
=
lens
[
tuned_axis
]
/
num_outputs
;
vec_splits
.
resize
(
num_outputs
,
chunk_size
);
}
else
{
std
::
size_t
chunk_size
=
lens
[
tuned_axis
]
/
num_outputs
+
1
;
std
::
size_t
last_chunk_size
=
lens
[
tuned_axis
]
-
chunk_size
*
(
num_outputs
-
1
);
vec_splits
.
resize
(
num_outputs
-
1
,
chunk_size
);
vec_splits
.
push_back
(
last_chunk_size
);
}
auto
dl
=
lens
[
tuned_axis
]
/
info
.
num_outputs
;
vec_splits
.
resize
(
info
.
num_outputs
,
dl
);
}
if
(
std
::
accumulate
(
vec_splits
.
begin
(),
vec_splits
.
end
(),
int64_t
(
0
))
!=
...
...
src/program.cpp
View file @
0662a9a3
...
...
@@ -936,7 +936,7 @@ void program::perf_report(std::ostream& os,
os
<<
std
::
endl
;
os
<<
"Batch size: "
<<
batch
<<
std
::
endl
;
os
<<
"Rate: "
<<
rate
*
batch
<<
"inferences/sec"
<<
std
::
endl
;
os
<<
"Rate: "
<<
rate
*
batch
<<
"
inferences/sec"
<<
std
::
endl
;
os
<<
"Total time: "
<<
total_time
<<
"ms"
<<
std
::
endl
;
os
<<
"Total instructions time: "
<<
total_instruction_time
<<
"ms"
<<
std
::
endl
;
os
<<
"Overhead time: "
<<
overhead_time
<<
"ms"
...
...
src/py/migraphx_py.cpp
View file @
0662a9a3
...
...
@@ -472,7 +472,8 @@ MIGRAPHX_PYBIND11_MODULE(migraphx, m)
map_dyn_input_dims
,
bool
skip_unknown_operators
,
bool
print_program_on_error
,
int64_t
max_loop_iterations
)
{
int64_t
max_loop_iterations
,
int64_t
limit_max_iterations
)
{
migraphx
::
onnx_options
options
;
options
.
default_dim_value
=
default_dim_value
;
options
.
default_dyn_dim_value
=
default_dyn_dim_value
;
...
...
@@ -481,6 +482,7 @@ MIGRAPHX_PYBIND11_MODULE(migraphx, m)
options
.
skip_unknown_operators
=
skip_unknown_operators
;
options
.
print_program_on_error
=
print_program_on_error
;
options
.
max_loop_iterations
=
max_loop_iterations
;
options
.
limit_max_iterations
=
limit_max_iterations
;
return
migraphx
::
parse_onnx
(
filename
,
options
);
},
"Parse onnx file"
,
...
...
@@ -492,7 +494,8 @@ MIGRAPHX_PYBIND11_MODULE(migraphx, m)
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
migraphx
::
shape
::
dynamic_dimension
>>
(),
py
::
arg
(
"skip_unknown_operators"
)
=
false
,
py
::
arg
(
"print_program_on_error"
)
=
false
,
py
::
arg
(
"max_loop_iterations"
)
=
10
);
py
::
arg
(
"max_loop_iterations"
)
=
10
,
py
::
arg
(
"limit_max_iterations"
)
=
std
::
numeric_limits
<
uint16_t
>::
max
());
m
.
def
(
"parse_onnx_buffer"
,
...
...
src/quantization.cpp
View file @
0662a9a3
...
...
@@ -147,8 +147,8 @@ void quantize_int8(program& prog,
run_passes
(
prog
,
{
quantize_int8_pass
{
ins_names
,
*
int8_quant_params
},
optimize_module
{},
simplify_qdq
{},
optimize_module
{},
dead_code_elimination
{}});
}
...
...
src/rewrite_quantization.cpp
View file @
0662a9a3
...
...
@@ -47,7 +47,7 @@ void apply_quantizelinear(module& m, instruction_ref ins)
ins
,
make_op
(
"convert"
,
{{
"target_type"
,
y_scale
->
get_shape
().
type
()}}),
x
);
}
auto
div
=
m
.
insert_instruction
(
ins
,
make_op
(
"div"
),
x
,
y_scale
);
auto
add_zero_point
=
m
.
insert_instruction
(
ins
,
make_op
(
"
round
"
),
div
);
auto
add_zero_point
=
m
.
insert_instruction
(
ins
,
make_op
(
"
nearbyint
"
),
div
);
if
(
ins
->
inputs
().
size
()
==
3
)
{
...
...
src/simplify_dyn_ops.cpp
View file @
0662a9a3
...
...
@@ -24,6 +24,7 @@
#include <migraphx/simplify_dyn_ops.hpp>
#include <migraphx/matcher.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/literal.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
...
@@ -131,10 +132,53 @@ struct find_const_4in_slice
}
};
/**
* Simplify dimensions_of to a literal when the input arugment has a static shape
* or the dynamic dimensions from `start` to `end` are fixed.
*/
struct
find_static_dimensions_of
{
auto
matcher
()
const
{
return
match
::
name
(
"dimensions_of"
)();
}
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
mr
)
const
{
auto
ins
=
mr
.
result
;
auto
input
=
ins
->
inputs
().
at
(
0
);
auto
dimensions_of_value
=
ins
->
get_operator
().
to_value
();
auto
start
=
dimensions_of_value
.
at
(
"start"
).
to
<
std
::
size_t
>
();
auto
end
=
dimensions_of_value
.
at
(
"end"
).
to
<
std
::
size_t
>
();
if
(
input
->
get_shape
().
dynamic
())
{
// check if dynamic dimensions from start to end are fixed
auto
dds
=
input
->
get_shape
().
dyn_dims
();
if
(
std
::
any_of
(
dds
.
begin
()
+
start
,
dds
.
begin
()
+
end
,
[](
auto
dd
)
{
return
not
dd
.
is_fixed
();
}))
{
return
;
}
}
std
::
size_t
output_ndim
=
end
-
start
;
std
::
vector
<
int64_t
>
vec_shape
(
output_ndim
);
migraphx
::
shape
s
(
migraphx
::
shape
::
int64_type
,
{
output_ndim
});
std
::
vector
<
std
::
size_t
>
input_lens
=
input
->
get_shape
().
to_static
(
1
).
lens
();
std
::
transform
(
input_lens
.
begin
()
+
start
,
input_lens
.
begin
()
+
end
,
vec_shape
.
begin
(),
[](
auto
i
)
{
return
int64_t
(
i
);
});
migraphx
::
shape
output_shape
{
migraphx
::
shape
::
int64_type
,
{
end
-
start
}};
auto
lit_ins
=
m
.
add_literal
(
migraphx
::
literal
{
output_shape
,
vec_shape
});
m
.
replace_instruction
(
ins
,
lit_ins
);
}
};
void
simplify_dyn_ops
::
apply
(
module
&
m
)
const
{
match
::
find_matches
(
m
,
find_static_2in_broadcasts
{},
find_const_3in_slice
{},
find_const_4in_slice
{});
match
::
find_matches
(
m
,
find_static_2in_broadcasts
{},
find_static_dimensions_of
{},
find_const_3in_slice
{},
find_const_4in_slice
{});
}
}
// namespace MIGRAPHX_INLINE_NS
...
...
src/simplify_reshapes.cpp
View file @
0662a9a3
...
...
@@ -647,8 +647,8 @@ struct find_broadcast_transpose
{
auto
transpose
=
r
.
result
;
auto
transpose_lens
=
transpose
->
get_shape
().
lens
();
auto
bcast_ins
=
r
.
instructions
[
"bcast_ins"
];
auto
input
=
bcast_ins
->
inputs
().
front
();
auto
bcast_ins
=
r
.
instructions
[
"bcast_ins"
];
auto
input
=
bcast_ins
->
inputs
().
front
();
// scalar transformation does not need extra transpose
if
(
not
input
->
get_shape
().
scalar
())
{
...
...
src/targets/gpu/CMakeLists.txt
View file @
0662a9a3
# ####################################################################################
# The MIT License (MIT)
#
# Copyright (c) 2015-202
2
Advanced Micro Devices, Inc. All rights reserved.
# Copyright (c) 2015-202
3
Advanced Micro Devices, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
...
...
@@ -37,8 +37,7 @@ if(NOT TARGET MIOpen)
message
(
SEND_ERROR
"Cant find miopen"
)
endif
()
if
(
NOT WIN32
)
# TODO: re-enable when CK is ported to Windows
if
(
MIGRAPHX_USE_COMPOSABLEKERNEL
)
find_package
(
composable_kernel 1.0.0 REQUIRED COMPONENTS jit_library
)
endif
()
...
...
@@ -52,10 +51,10 @@ file(GLOB KERNEL_FILES CONFIGURE_DEPENDS
${
CMAKE_CURRENT_SOURCE_DIR
}
/kernels/include/migraphx/kernels/*.hpp
)
message
(
STATUS
"KERNEL_FILES:
${
KERNEL_FILES
}
"
)
if
(
WIN32
)
# TODO: re-enable when CK is ported to Windows
if
(
NOT MIGRAPHX_USE_COMPOSABLEKERNEL
)
list
(
REMOVE_ITEM KERNEL_FILES
${
CMAKE_CURRENT_SOURCE_DIR
}
/kernels/include/migraphx/kernels/ck_gemm.hpp
${
CMAKE_CURRENT_SOURCE_DIR
}
/kernels/include/migraphx/kernels/ck_gemm_softmax_gemm.hpp
${
CMAKE_CURRENT_SOURCE_DIR
}
/kernels/include/migraphx/kernels/ck.hpp
)
endif
()
...
...
@@ -103,9 +102,10 @@ rocm_clang_tidy_check(kernel_file_check)
file
(
GLOB JIT_GPU_SRCS CONFIGURE_DEPENDS
${
CMAKE_CURRENT_SOURCE_DIR
}
/jit/*.cpp
)
if
(
WIN32
)
# TODO: re-enable when CK is ported to Windows
list
(
REMOVE_ITEM JIT_GPU_SRCS
${
CMAKE_CURRENT_SOURCE_DIR
}
/jit/ck_gemm.cpp
)
if
(
NOT MIGRAPHX_USE_COMPOSABLEKERNEL
)
list
(
REMOVE_ITEM JIT_GPU_SRCS
${
CMAKE_CURRENT_SOURCE_DIR
}
/jit/ck_gemm.cpp
${
CMAKE_CURRENT_SOURCE_DIR
}
/jit/ck_gemm_softmax_gemm.cpp
)
endif
()
add_library
(
migraphx_gpu
...
...
@@ -128,8 +128,6 @@ add_library(migraphx_gpu
gather.cpp
gemm_impl.cpp
hip.cpp
int8_conv_pack.cpp
int8_gemm_pack.cpp
kernel.cpp
lowering.cpp
logsoftmax.cpp
...
...
@@ -140,7 +138,6 @@ add_library(migraphx_gpu
no_device.cpp
nonzero.cpp
pack_args.cpp
pack_int8_args.cpp
prefuse_ops.cpp
pad.cpp
perfdb.cpp
...
...
@@ -184,7 +181,6 @@ register_migraphx_gpu_ops(hip_
register_migraphx_gpu_ops
(
miopen_
abs
contiguous
int8_conv_pack
lrn
pooling
)
...
...
@@ -192,10 +188,6 @@ register_op(migraphx_gpu
HEADER migraphx/gpu/rnn_variable_seq_lens.hpp
OPERATORS gpu::hip_rnn_var_sl_shift_sequence gpu::hip_rnn_var_sl_shift_output gpu::hip_rnn_var_sl_last_output
INCLUDES migraphx/gpu/context.hpp
)
register_op
(
migraphx_gpu
HEADER migraphx/gpu/int8_gemm_pack.hpp
OPERATORS gpu::hip_int8_gemm_pack_a gpu::hip_int8_gemm_pack_b
INCLUDES migraphx/gpu/context.hpp
)
register_op
(
migraphx_gpu
HEADER migraphx/gpu/gemm.hpp
OPERATORS gpu::rocblas_gemm<op::dot> gpu::rocblas_gemm<op::quant_dot>
...
...
@@ -239,24 +231,28 @@ else()
string
(
REGEX REPLACE
" /[^ ]+
\\
.(a|so) "
" "
HIP_COMPILER_FLAGS
"
${
HIP_COMPILER_FLAGS
}
"
)
endforeach
()
message
(
STATUS
"Hip compiler flags:
${
HIP_COMPILER_FLAGS
}
"
)
message
(
STATUS
"Hip compiler flags:
\"
${
HIP_COMPILER_FLAGS
}
\"
"
)
target_compile_definitions
(
migraphx_gpu PRIVATE
"
-DMIGRAPHX_HIP_COMPILER=
${
CMAKE_CXX_COMPILER
}
"
"
-DMIGRAPHX_HIP_COMPILER_FLAGS=
${
HIP_COMPILER_FLAGS
}
"
-DMIGRAPHX_HIP_COMPILER=
"
${
CMAKE_CXX_COMPILER
}
"
-DMIGRAPHX_HIP_COMPILER_FLAGS=
"
${
HIP_COMPILER_FLAGS
}
"
)
if
(
DEFINED CMAKE_CXX_COMPILER_LAUNCHER
)
execute_process
(
COMMAND which
${
CMAKE_CXX_COMPILER_LAUNCHER
}
OUTPUT_VARIABLE MIGRAPHX_HIP_COMPILER_LAUNCHER
)
string
(
STRIP
"
${
MIGRAPHX_HIP_COMPILER_LAUNCHER
}
"
MIGRAPHX_HIP_COMPILER_LAUNCHER
)
target_compile_definitions
(
migraphx_gpu PRIVATE
"
-DMIGRAPHX_HIP_COMPILER_LAUNCHER=
${
MIGRAPHX_HIP_COMPILER_LAUNCHER
}
"
)
target_compile_definitions
(
migraphx_gpu PRIVATE -DMIGRAPHX_HIP_COMPILER_LAUNCHER=
"
${
MIGRAPHX_HIP_COMPILER_LAUNCHER
}
"
)
endif
()
endif
()
# Check miopen find mode api
include
(
CheckLibraryExists
)
get_target_property
(
MIOPEN_LOCATION MIOpen LOCATION
)
get_target_property
(
ROCBLAS_LOCATION roc::rocblas LOCATION
)
check_library_exists
(
MIOpen
"miopenHiddenSetConvolutionFindMode"
"
${
MIOPEN_LOCATION
}
"
HAS_FIND_MODE_API
)
check_library_exists
(
MIOpen
"miopenFindSolutions"
"
${
MIOPEN_LOCATION
}
"
HAS_FIND_2_API
)
# Beta API for automated GEMM tuning
check_library_exists
(
roc::rocblas
"rocblas_gemm_ex_get_solutions"
"
${
ROCBLAS_LOCATION
}
"
HAS_ROCBLAS_TUNING_BETA_FEATURE_API
)
set
(
MIGRAPHX_USE_FIND_2_API
"
${
HAS_FIND_2_API
}
"
CACHE BOOL
""
)
...
...
@@ -279,10 +275,16 @@ else()
message
(
STATUS
"MIOpen does not have find mode api"
)
endif
()
if
(
HAS_ROCBLAS_TUNING_BETA_FEATURE_API
)
target_compile_definitions
(
migraphx_gpu PUBLIC -DMIGRAPHX_USE_ROCBLAS_TUNING_API -DROCBLAS_BETA_FEATURES_API -DROCBLAS_NO_DEPRECATED_WARNINGS
)
message
(
STATUS
"MIGraphx is using Beta API of rocBLAS"
)
else
()
message
(
STATUS
"rocBLAS does not have User Tuning Beta API"
)
endif
()
target_link_libraries
(
migraphx_gpu PUBLIC migraphx MIOpen roc::rocblas
)
target_link_libraries
(
migraphx_gpu PRIVATE migraphx_device migraphx_kernels
)
if
(
NOT WIN32
)
# TODO: re-enable when CK is ported to Windows
if
(
MIGRAPHX_USE_COMPOSABLEKERNEL
)
target_link_libraries
(
migraphx_gpu PRIVATE composable_kernel::jit_library
)
endif
()
...
...
src/targets/gpu/compile_hip.cpp
View file @
0662a9a3
...
...
@@ -284,16 +284,20 @@ std::vector<std::vector<char>> compile_hip_src_with_hiprtc(std::vector<hiprtc_sr
bool
is_hip_clang_compiler
()
{
static
const
auto
result
=
ends_with
(
MIGRAPHX_STRINGIZE
(
MIGRAPHX_HIP_COMPILER
),
"clang++"
)
;
static
const
auto
result
=
fs
::
path
{
MIGRAPHX_HIP_COMPILER
}.
stem
()
==
"clang++"
;
return
result
;
}
#ifdef MIGRAPHX_HIP_COMPILER_LAUNCHER
bool
has_compiler_launcher
()
{
static
const
auto
result
=
fs
::
exists
(
MIGRAPHX_
STRINGIZE
(
MIGRAPHX_
HIP_COMPILER_LAUNCHER
)
)
;
static
const
auto
result
=
fs
::
exists
(
MIGRAPHX_HIP_COMPILER_LAUNCHER
);
return
result
;
}
#endif
src_compiler
assemble
(
src_compiler
compiler
)
{
compiler
.
out_ext
=
".S"
;
...
...
@@ -306,8 +310,7 @@ compile_hip_src(const std::vector<src_file>& srcs, std::string params, const std
{
assert
(
not
srcs
.
empty
());
if
(
not
is_hip_clang_compiler
())
MIGRAPHX_THROW
(
"Unknown hip compiler: "
+
std
::
string
(
MIGRAPHX_STRINGIZE
(
MIGRAPHX_HIP_COMPILER
)));
MIGRAPHX_THROW
(
"Unknown hip compiler: "
MIGRAPHX_HIP_COMPILER
);
if
(
params
.
find
(
"-std="
)
==
std
::
string
::
npos
)
params
+=
" --std=c++17"
;
...
...
@@ -323,14 +326,14 @@ compile_hip_src(const std::vector<src_file>& srcs, std::string params, const std
params
+=
" -DMIGRAPHX_DEBUG"
;
params
+=
" -Wno-unused-command-line-argument -Wno-cuda-compat "
;
params
+=
MIGRAPHX_STRINGIZE
(
MIGRAPHX_HIP_COMPILER_FLAGS
)
;
params
+=
MIGRAPHX_HIP_COMPILER_FLAGS
;
src_compiler
compiler
;
compiler
.
flags
=
params
;
compiler
.
compiler
=
MIGRAPHX_STRINGIZE
(
MIGRAPHX_HIP_COMPILER
)
;
compiler
.
compiler
=
MIGRAPHX_HIP_COMPILER
;
#ifdef MIGRAPHX_HIP_COMPILER_LAUNCHER
if
(
has_compiler_launcher
())
compiler
.
launcher
=
MIGRAPHX_STRINGIZE
(
MIGRAPHX_HIP_COMPILER_LAUNCHER
)
;
compiler
.
launcher
=
MIGRAPHX_HIP_COMPILER_LAUNCHER
;
#endif
if
(
enabled
(
MIGRAPHX_GPU_DUMP_SRC
{}))
{
...
...
@@ -354,7 +357,7 @@ compile_hip_src(const std::vector<src_file>& srcs, std::string params, const std
bool
hip_has_flags
(
const
std
::
vector
<
std
::
string
>&
flags
)
{
src_compiler
compiler
;
compiler
.
compiler
=
MIGRAPHX_STRINGIZE
(
MIGRAPHX_HIP_COMPILER
)
;
compiler
.
compiler
=
MIGRAPHX_HIP_COMPILER
;
compiler
.
flags
=
join_strings
(
flags
,
" "
)
+
" -x hip -c --offload-arch=gfx900 --cuda-device-only"
;
...
...
src/targets/gpu/compile_miopen.cpp
View file @
0662a9a3
...
...
@@ -60,9 +60,8 @@ struct miopen_op
};
MIGRAPHX_REGISTER_OP
(
miopen_op
);
std
::
size_t
compile_miopen
::
compile
(
operation
&
op
,
instruction_ref
ins
,
bool
format
)
const
std
::
size_t
compile_miopen
::
compile
(
operation
&
op
,
instruction_ref
ins
)
const
{
op
.
from_value
({{
"int8_x4_format"
,
format
}});
auto
v
=
op
.
compile
(
*
ctx
,
ins
->
get_shape
(),
to_shapes
(
ins
->
inputs
()));
return
v
.
get
<
std
::
size_t
>
(
"workspace"
,
0
);
}
...
...
@@ -70,25 +69,15 @@ std::size_t compile_miopen::compile(operation& op, instruction_ref ins, bool for
void
compile_miopen
::
apply
(
module
&
m
)
const
{
assert
(
ctx
);
const
bool
int8_x4_format
=
get_int8_x4_format
(
any_cast
<
migraphx
::
gpu
::
context
>
(
*
ctx
));
for
(
auto
ins
:
iterator_for
(
m
))
{
if
(
ins
->
name
()
!=
"gpu::miopen_op"
)
continue
;
auto
op
=
any_cast
<
miopen_op
>
(
ins
->
get_operator
()).
op
;
std
::
size_t
ws
=
0
;
try
{
// for the regular convolution and convolution_backwards, this try would always succeed
ws
=
compile
(
op
,
ins
,
int8_x4_format
);
}
catch
(
migraphx
::
exception
&
)
{
// In case no solver supports the default format, retry using the other format.
ws
=
compile
(
op
,
ins
,
not
int8_x4_format
);
}
auto
inputs
=
ins
->
inputs
();
auto
alloc
=
m
.
insert_instruction
(
ws
=
compile
(
op
,
ins
);
auto
inputs
=
ins
->
inputs
();
auto
alloc
=
m
.
insert_instruction
(
ins
,
make_op
(
"allocate"
,
{{
"shape"
,
to_value
(
shape
{
shape
::
int8_type
,
{
ws
}})}}));
inputs
.
insert
(
std
::
prev
(
inputs
.
end
()),
alloc
);
...
...
src/targets/gpu/compile_ops.cpp
View file @
0662a9a3
...
...
@@ -168,6 +168,7 @@ struct compile_plan
}
const
compiled_result
&
benchmark
(
problem_cache
&
pc
)
const
{
const
auto
trace_level
=
value_of
(
MIGRAPHX_TRACE_BENCHMARKING
{});
if
(
results
.
empty
())
MIGRAPHX_THROW
(
"No configs to tune"
);
if
(
results
.
size
()
==
1
)
...
...
@@ -178,9 +179,10 @@ struct compile_plan
}
if
(
not
config
)
MIGRAPHX_THROW
(
"Multiple kernels without config"
);
std
::
cout
<<
"Benchmarking "
<<
preop
.
name
()
<<
": "
<<
results
.
size
()
<<
" configs"
<<
std
::
endl
;
if
(
enabled
(
MIGRAPHX_TRACE_BENCHMARKING
{}))
if
(
trace_level
>
0
)
std
::
cout
<<
"Benchmarking "
<<
preop
.
name
()
<<
": "
<<
results
.
size
()
<<
" configs"
<<
std
::
endl
;
if
(
trace_level
>
1
)
std
::
cout
<<
"Problem: "
<<
config
->
problem
<<
std
::
endl
;
std
::
vector
<
double
>
times
;
times
.
reserve
(
results
.
size
());
...
...
@@ -189,22 +191,23 @@ struct compile_plan
config
->
solutions
.
begin
(),
std
::
back_inserter
(
times
),
[
&
](
const
auto
&
cr
,
const
auto
&
solution
)
{
if
(
enabled
(
MIGRAPHX_TRACE_BENCHMARKING
{})
)
if
(
trace_level
>
1
)
std
::
cout
<<
"Benchmarking solution: "
<<
solution
<<
std
::
endl
;
if
(
not
cr
.
has_value
())
{
if
(
enabled
(
MIGRAPHX_TRACE_BENCHMARKING
{})
)
if
(
trace_level
>
1
)
std
::
cout
<<
"No binary"
<<
std
::
endl
;
return
std
::
numeric_limits
<
double
>::
max
();
}
auto
t
=
time_op
(
*
ctx
,
cr
->
replace
.
code_object
,
to_shapes
(
cr
->
ins
->
inputs
()),
20
);
if
(
enabled
(
MIGRAPHX_TRACE_BENCHMARKING
{})
)
if
(
trace_level
>
1
)
std
::
cout
<<
t
<<
"ms"
<<
std
::
endl
;
return
t
;
});
auto
i
=
std
::
distance
(
times
.
begin
(),
std
::
min_element
(
times
.
begin
(),
times
.
end
()));
std
::
cout
<<
"Fastest solution: "
<<
config
->
solutions
.
at
(
i
)
<<
std
::
endl
;
if
(
trace_level
>
0
)
std
::
cout
<<
"Fastest solution: "
<<
config
->
solutions
.
at
(
i
)
<<
std
::
endl
;
pc
.
insert
(
preop
.
name
(),
config
->
problem
,
config
->
solutions
.
at
(
i
));
if
(
not
results
[
i
].
has_value
())
MIGRAPHX_THROW
(
"No valid tuned compilation."
);
...
...
src/targets/gpu/device/int8_gemm_pack.cpp
deleted
100644 → 0
View file @
b74d3a8f
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/shape.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/gpu/device/int8_gemm_pack.hpp>
#include <migraphx/gpu/device/launch.hpp>
#include <migraphx/gpu/device/types.hpp>
#include <migraphx/gpu/device/tensor.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
void
int8_gemm_pack_a
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
{
auto
comp_shape
=
arg
.
get_shape
();
auto
out_lens
=
comp_shape
.
lens
();
auto
dim_0
=
out_lens
.
size
()
-
2
;
auto
dim_1
=
out_lens
.
size
()
-
1
;
std
::
size_t
lda
=
comp_shape
.
strides
()[
dim_0
];
std
::
size_t
m_size
=
out_lens
[
dim_0
]
*
out_lens
[
dim_1
];
visit_all
(
result
,
arg
)([
&
](
auto
output
,
auto
input
)
{
std
::
size_t
nelements
=
comp_shape
.
elements
();
auto
*
out_ptr
=
device_cast
(
output
.
data
());
auto
*
in_ptr
=
device_cast
(
input
.
data
());
visit_tensor_size
(
out_lens
.
size
(),
[
&
](
auto
out_dim
)
{
hip_tensor_descriptor
<
out_dim
>
desc
(
comp_shape
);
gs_launch
(
stream
,
nelements
,
256
)([
=
](
auto
ii
)
__device__
{
const
size_t
nb
=
4
;
auto
idx
=
desc
.
multi
(
ii
);
std
::
size_t
i_m
=
idx
[
dim_1
];
std
::
size_t
i_k
=
idx
[
dim_0
];
std
::
size_t
offset
=
ii
/
m_size
*
m_size
;
out_ptr
[
i_k
%
nb
+
(
i_m
+
(
i_k
/
nb
)
*
lda
)
*
nb
+
offset
]
=
in_ptr
[
i_m
+
i_k
*
lda
+
offset
];
});
});
});
}
void
int8_gemm_pack_b
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
{
auto
trans_shape
=
arg
.
get_shape
();
auto
out_lens
=
trans_shape
.
lens
();
auto
dim_0
=
trans_shape
.
lens
().
size
()
-
2
;
auto
dim_1
=
trans_shape
.
lens
().
size
()
-
1
;
std
::
size_t
ldb
=
trans_shape
.
strides
()[
dim_1
];
auto
wrap_lens
=
out_lens
;
std
::
swap
(
wrap_lens
[
dim_0
],
wrap_lens
[
dim_1
]);
shape
comp_shape
{
trans_shape
.
type
(),
wrap_lens
};
std
::
size_t
m_size
=
out_lens
[
dim_0
]
*
out_lens
[
dim_1
];
visit_all
(
result
,
arg
)([
&
](
auto
output
,
auto
input
)
{
std
::
size_t
nelements
=
comp_shape
.
elements
();
auto
*
out_ptr
=
device_cast
(
output
.
data
());
auto
*
in_ptr
=
device_cast
(
input
.
data
());
visit_tensor_size
(
out_lens
.
size
(),
[
&
](
auto
out_dim
)
{
hip_tensor_descriptor
<
out_dim
>
desc
(
comp_shape
);
gs_launch
(
stream
,
nelements
,
256
)([
=
](
auto
ii
)
__device__
{
const
size_t
nb
=
4
;
auto
idx
=
desc
.
multi
(
ii
);
std
::
size_t
i_n
=
idx
[
dim_1
];
std
::
size_t
i_k
=
idx
[
dim_0
];
std
::
size_t
offset
=
ii
/
m_size
*
m_size
;
out_ptr
[
i_k
%
nb
+
(
i_n
+
(
i_k
/
nb
)
*
ldb
)
*
nb
+
offset
]
=
in_ptr
[
i_n
+
i_k
*
ldb
+
offset
];
});
});
});
}
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/gemm_impl.cpp
View file @
0662a9a3
This diff is collapsed.
Click to expand it.
src/targets/gpu/include/migraphx/gpu/compile_miopen.hpp
View file @
0662a9a3
...
...
@@ -42,7 +42,7 @@ struct compile_miopen
context
*
ctx
=
nullptr
;
std
::
string
name
()
const
{
return
"gpu::compile_miopen"
;
}
void
apply
(
module
&
m
)
const
;
std
::
size_t
compile
(
operation
&
op
,
instruction_ref
ins
,
bool
format
)
const
;
std
::
size_t
compile
(
operation
&
op
,
instruction_ref
ins
)
const
;
};
}
// namespace gpu
...
...
src/targets/gpu/include/migraphx/gpu/convolution.hpp
View file @
0662a9a3
...
...
@@ -57,7 +57,6 @@ template <class Op>
struct
miopen_convolution
{
Op
op
;
bool
int8_x4_format
=
false
;
shared
<
convolution_descriptor
>
cd
=
nullptr
;
miopenConvFwdAlgorithm_t
algo
{};
#ifdef MIGRAPHX_HAS_FIND_2_API
...
...
@@ -74,7 +73,6 @@ struct miopen_convolution
f
(
self
.
solution_object
,
"solution_object"
),
#endif
f
(
self
.
algo
,
"algo"
),
f
(
self
.
int8_x4_format
,
"int8_x4_format"
),
f
(
self
.
solution_id
,
"solution_id"
));
}
...
...
@@ -94,9 +92,9 @@ struct miopen_convolution
argument
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
{
auto
x_desc
=
make_tensor
(
reshape_if_1d
(
args
[
0
].
get_shape
())
,
int8_x4_format
);
auto
w_desc
=
make_tensor
(
reshape_if_1d
(
args
[
1
].
get_shape
())
,
int8_x4_format
);
auto
y_desc
=
make_tensor
(
reshape_if_1d
(
output_shape
));
auto
x_desc
=
make_tensor
(
reshape_if_1d
(
args
[
0
].
get_shape
()));
auto
w_desc
=
make_tensor
(
reshape_if_1d
(
args
[
1
].
get_shape
()));
auto
y_desc
=
make_tensor
(
reshape_if_1d
(
output_shape
));
auto
*
miopen_stream_handle
=
ctx
.
get_stream
().
get_miopen
();
auto
workspace_size
=
args
[
2
].
get_shape
().
bytes
();
...
...
@@ -162,8 +160,8 @@ struct miopen_convolution
shape
find
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
shape
>&
inputs
)
{
shape
workspace_shape
{};
auto
x_desc
=
make_tensor
(
reshape_if_1d
(
inputs
[
0
])
,
int8_x4_format
);
auto
w_desc
=
make_tensor
(
reshape_if_1d
(
inputs
[
1
])
,
int8_x4_format
);
auto
x_desc
=
make_tensor
(
reshape_if_1d
(
inputs
[
0
]));
auto
w_desc
=
make_tensor
(
reshape_if_1d
(
inputs
[
1
]));
auto
y_desc
=
make_tensor
(
reshape_if_1d
(
output_shape
));
auto
*
miopen_stream_handle
=
ctx
.
get_stream
().
get_miopen
();
...
...
@@ -179,13 +177,8 @@ struct miopen_convolution
workspace_shape
=
shape
{
shape
::
int8_type
,
{
workspace_size
}};
auto
x_shape
=
inputs
[
0
];
auto
w_shape
=
inputs
[
1
];
if
(
int8_x4_format
)
{
x_shape
=
pack_int8_shape
(
x_shape
);
w_shape
=
pack_int8_shape
(
w_shape
);
}
const
auto
&
x_shape
=
inputs
[
0
];
const
auto
&
w_shape
=
inputs
[
1
];
#ifdef MIGRAPHX_HAS_FIND_2_API
{
...
...
@@ -327,8 +320,8 @@ struct miopen_convolution
": workspace has changed during finalization."
);
}
auto
x_desc
=
make_tensor
(
reshape_if_1d
(
inputs
[
0
])
,
int8_x4_format
);
auto
w_desc
=
make_tensor
(
reshape_if_1d
(
inputs
[
1
])
,
int8_x4_format
);
auto
x_desc
=
make_tensor
(
reshape_if_1d
(
inputs
[
0
]));
auto
w_desc
=
make_tensor
(
reshape_if_1d
(
inputs
[
1
]));
auto
y_desc
=
make_tensor
(
reshape_if_1d
(
output_shape
));
auto
status
=
miopenConvolutionForwardCompileSolution
(
ctx
.
get_stream
().
get_miopen
(),
...
...
@@ -347,21 +340,6 @@ struct miopen_convolution
{
return
shapes
.
size
()
-
1
;
}
inline
shape
pack_int8_shape
(
const
shape
&
s
)
const
{
if
(
s
.
type
()
!=
shape
::
int8_type
)
{
return
s
;
}
auto
lens
=
s
.
lens
();
auto
strides
=
s
.
strides
();
lens
[
1
]
=
(
lens
[
1
]
+
3
)
/
4
*
4
;
strides
[
0
]
=
strides
[
1
]
*
lens
[
1
];
return
{
s
.
type
(),
lens
,
strides
};
}
};
}
// namespace gpu
...
...
src/targets/gpu/include/migraphx/gpu/device/int8_gemm_pack.hpp
deleted
100644 → 0
View file @
b74d3a8f
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_INT8_GEMM_PACK_HPP
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_INT8_GEMM_PACK_HPP
#include <migraphx/argument.hpp>
#include <migraphx/gpu/device/config.hpp>
#include <hip/hip_runtime_api.h>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
void
MIGRAPHX_DEVICE_EXPORT
int8_gemm_pack_a
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
);
void
MIGRAPHX_DEVICE_EXPORT
int8_gemm_pack_b
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
);
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/include/migraphx/gpu/gemm.hpp
View file @
0662a9a3
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-202
2
Advanced Micro Devices, Inc. All rights reserved.
* Copyright (c) 2015-202
3
Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
...
...
@@ -40,9 +40,8 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace
gpu
{
struct
context
;
void
blas_shape
(
const
shape
&
s
);
shape
transpose_batch
(
const
shape
&
s
,
unsigned
trans_batch
);
void
blas_shape
(
const
shape
&
s
);
template
<
class
Op
>
struct
rocblas_gemm
...
...
@@ -50,9 +49,9 @@ struct rocblas_gemm
Op
op
;
float
alpha
=
1
;
float
beta
=
0
;
bool
int8_x4_format
=
true
;
bool
compute_fp32
=
false
;
unsigned
trans_batch
=
0
;
int32_t
solution_idx
=
0
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
...
...
@@ -60,9 +59,9 @@ struct rocblas_gemm
return
pack_join
(
migraphx
::
reflect
(
self
.
op
,
f
),
pack
(
f
(
self
.
alpha
,
"alpha"
),
f
(
self
.
beta
,
"beta"
),
f
(
self
.
int8_x4_format
,
"int8_x4_format"
),
f
(
self
.
compute_fp32
,
"compute_fp32"
),
f
(
self
.
trans_batch
,
"trans_batch"
)));
f
(
self
.
trans_batch
,
"trans_batch"
),
f
(
self
.
solution_idx
,
"solution_idx"
)));
}
std
::
string
name
()
const
...
...
@@ -78,6 +77,8 @@ struct rocblas_gemm
{
std
::
vector
<
shape
>
in_shapes
(
inputs
);
in_shapes
.
pop_back
();
// When input shapes are A, B, C the GEMM equation is C = α AB+ β C where α, β are
// scalars
check_shapes
{
in_shapes
,
*
this
}.
has
(
2
,
3
);
blas_shape
(
inputs
[
0
]);
blas_shape
(
inputs
[
1
]);
...
...
@@ -113,17 +114,12 @@ struct rocblas_gemm
{
if
(
this
->
name
()
==
"gpu::gemm"
)
{
gemm
(
ctx
,
output_shape
,
args
,
alpha
,
beta
,
int8_x4_format
,
compute_fp32
);
gemm
_compute
(
ctx
,
output_shape
,
args
,
alpha
,
beta
,
compute_fp32
,
solution_idx
);
}
else
{
gemm
(
ctx
,
output_shape
,
args
,
int32_t
(
alpha
),
int32_t
(
beta
),
int8_x4_format
,
compute_fp32
);
gemm_compute
(
ctx
,
output_shape
,
args
,
int32_t
(
alpha
),
int32_t
(
beta
),
compute_fp32
,
solution_idx
);
}
return
args
.
back
();
}
...
...
@@ -132,6 +128,33 @@ struct rocblas_gemm
{
return
shapes
.
size
()
-
1
;
}
void
finalize
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
shape
>&
input_shapes
)
{
#ifdef MIGRAPHX_USE_ROCBLAS_TUNING_API
if
(
enabled
(
MIGRAPHX_ENABLE_GEMM_TUNING
{})
or
ctx
.
get_exhaustive_tune_flag
())
{
if
(
this
->
name
()
==
"gpu::gemm"
)
{
solution_idx
=
gemm_finalize
(
ctx
,
output_shape
,
input_shapes
,
alpha
,
beta
,
compute_fp32
,
solution_idx
);
}
else
{
solution_idx
=
gemm_finalize
(
ctx
,
output_shape
,
input_shapes
,
int32_t
(
alpha
),
int32_t
(
beta
),
compute_fp32
,
solution_idx
);
}
}
#else
// suppress compiler warnings
(
void
)
ctx
,
(
void
)
output_shape
,
(
void
)
input_shapes
;
#endif
}
};
}
// namespace gpu
...
...
Prev
1
2
3
4
5
6
7
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment