Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
b3955af4
Commit
b3955af4
authored
Jun 21, 2022
by
Paul
Browse files
Merge
parents
1af49c6f
c0398ded
Changes
32
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
266 additions
and
151 deletions
+266
-151
Jenkinsfile
Jenkinsfile
+1
-1
src/CMakeLists.txt
src/CMakeLists.txt
+2
-0
src/dead_code_elimination.cpp
src/dead_code_elimination.cpp
+4
-2
src/include/migraphx/allocation_model.hpp
src/include/migraphx/allocation_model.hpp
+17
-0
src/include/migraphx/op/allocate.hpp
src/include/migraphx/op/allocate.hpp
+43
-0
src/include/migraphx/replace_allocate.hpp
src/include/migraphx/replace_allocate.hpp
+23
-0
src/instruction.cpp
src/instruction.cpp
+1
-0
src/module.cpp
src/module.cpp
+2
-3
src/replace_allocate.cpp
src/replace_allocate.cpp
+101
-0
src/targets/cpu/include/migraphx/cpu/allocation_model.hpp
src/targets/cpu/include/migraphx/cpu/allocation_model.hpp
+1
-0
src/targets/cpu/lowering.cpp
src/targets/cpu/lowering.cpp
+1
-24
src/targets/cpu/target.cpp
src/targets/cpu/target.cpp
+3
-0
src/targets/gpu/compile_gen.cpp
src/targets/gpu/compile_gen.cpp
+1
-1
src/targets/gpu/include/migraphx/gpu/allocation_model.hpp
src/targets/gpu/include/migraphx/gpu/allocation_model.hpp
+1
-0
src/targets/gpu/include/migraphx/gpu/miopen.hpp
src/targets/gpu/include/migraphx/gpu/miopen.hpp
+5
-4
src/targets/gpu/include/migraphx/gpu/quant_convolution.hpp
src/targets/gpu/include/migraphx/gpu/quant_convolution.hpp
+4
-1
src/targets/gpu/lowering.cpp
src/targets/gpu/lowering.cpp
+33
-108
src/targets/gpu/pack_int8_args.cpp
src/targets/gpu/pack_int8_args.cpp
+7
-1
src/targets/gpu/quant_convolution.cpp
src/targets/gpu/quant_convolution.cpp
+13
-6
src/targets/gpu/target.cpp
src/targets/gpu/target.cpp
+3
-0
No files found.
Jenkinsfile
View file @
b3955af4
...
...
@@ -37,7 +37,7 @@ def rocmtestnode(Map conf) {
stage
(
"checkout ${variant}"
)
{
checkout
scm
}
gitStatusWrapper
(
credentialsId:
'7126e5fe-eb51-4576-b52b-9aaf1de8f0fd'
,
gitHubContext:
"Jenkins - ${variant}"
,
account:
'ROCmSoftwarePlatform'
,
repo:
'AMDMIGraphX'
)
{
gitStatusWrapper
(
credentialsId:
"${env.status_wrapper_creds}"
,
gitHubContext:
"Jenkins - ${variant}"
,
account:
'ROCmSoftwarePlatform'
,
repo:
'AMDMIGraphX'
)
{
pre
()
stage
(
"image ${variant}"
)
{
try
{
...
...
src/CMakeLists.txt
View file @
b3955af4
...
...
@@ -54,6 +54,7 @@ add_library(migraphx
reduce_dims.cpp
register_op.cpp
register_target.cpp
replace_allocate.cpp
simplify_qdq.cpp
rewrite_batchnorm.cpp
rewrite_pooling.cpp
...
...
@@ -80,6 +81,7 @@ register_migraphx_ops(
acosh
acos
add
allocate
argmax
argmin
asinh
...
...
src/dead_code_elimination.cpp
View file @
b3955af4
...
...
@@ -4,6 +4,7 @@
#include <migraphx/iterator_for.hpp>
#include <migraphx/functional.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/stringutils.hpp>
#include <unordered_set>
namespace
migraphx
{
...
...
@@ -24,9 +25,10 @@ void dead_code_elimination::apply(module& m) const
// Skip the last instruction
if
(
i
==
last
)
break
;
// Skip instruction with empty shape as output unless its a builtin or undefined or identity
// Skip instruction with empty shape as output unless its a builtin, undefined, identity, or
// allocate
if
(
i
->
get_shape
().
elements
()
==
0
and
i
->
name
().
front
()
!=
'@'
and
i
->
name
()
!=
"undefined"
and
i
->
name
()
!=
"identity"
)
not
contains
({
"undefined"
,
"identity"
,
"allocate"
},
i
->
name
())
)
continue
;
assert
(
std
::
distance
(
m
.
begin
(),
i
)
<=
std
::
distance
(
m
.
begin
(),
last
));
std
::
unordered_set
<
instruction_ref
>
visited
;
...
...
src/include/migraphx/allocation_model.hpp
View file @
b3955af4
...
...
@@ -28,6 +28,8 @@ struct allocation_model
operation
allocate
(
const
shape
&
s
)
const
;
/// Create a preallocated operator for the given shape
operation
preallocate
(
const
shape
&
s
,
const
std
::
string
&
id
)
const
;
/// Check if outputs are to be inserted
bool
needs_out_params
()
const
;
};
#else
...
...
@@ -45,6 +47,8 @@ struct allocation_model
operation
allocate
(
const
shape
&
s
)
const
;
//
operation
preallocate
(
const
shape
&
s
,
std
::
string
id
)
const
;
//
bool
needs_out_params
()
const
;
};
#else
...
...
@@ -136,6 +140,12 @@ struct allocation_model
return
(
*
this
).
private_detail_te_get_handle
().
preallocate
(
s
,
std
::
move
(
id
));
}
bool
needs_out_params
()
const
{
assert
((
*
this
).
private_detail_te_handle_mem_var
);
return
(
*
this
).
private_detail_te_get_handle
().
needs_out_params
();
}
friend
bool
is_shared
(
const
allocation_model
&
private_detail_x
,
const
allocation_model
&
private_detail_y
)
{
...
...
@@ -154,6 +164,7 @@ struct allocation_model
virtual
std
::
string
copy
()
const
=
0
;
virtual
operation
allocate
(
const
shape
&
s
)
const
=
0
;
virtual
operation
preallocate
(
const
shape
&
s
,
std
::
string
id
)
const
=
0
;
virtual
bool
needs_out_params
()
const
=
0
;
};
template
<
typename
PrivateDetailTypeErasedT
>
...
...
@@ -200,6 +211,12 @@ struct allocation_model
return
private_detail_te_value
.
preallocate
(
s
,
std
::
move
(
id
));
}
bool
needs_out_params
()
const
override
{
return
private_detail_te_value
.
needs_out_params
();
}
PrivateDetailTypeErasedT
private_detail_te_value
;
};
...
...
src/include/migraphx/op/allocate.hpp
0 → 100644
View file @
b3955af4
#ifndef MIGRAPHX_GUARD_OPERATORS_ALLOCATE_HPP
#define MIGRAPHX_GUARD_OPERATORS_ALLOCATE_HPP
#include <array>
#include <migraphx/check_shapes.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/streamutils.hpp>
#include <migraphx/literal.hpp>
#include <migraphx/shape_for_each.hpp>
#include <migraphx/config.hpp>
#include <cmath>
#include <utility>
#include <migraphx/shape.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
op
{
struct
allocate
{
shape
s
{};
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
pack
(
f
(
self
.
s
,
"shape"
));
}
std
::
string
name
()
const
{
return
"allocate"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
migraphx
::
check_shapes
{
inputs
,
*
this
}.
has
(
0
);
return
s
;
}
argument
compute
(
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
)
const
{
return
{
output_shape
};
}
};
}
// namespace op
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/include/migraphx/replace_allocate.hpp
0 → 100644
View file @
b3955af4
#ifndef MIGRAPHX_GUARD_RTGLIB_REPLACE_ALLOCATE_HPP
#define MIGRAPHX_GUARD_RTGLIB_REPLACE_ALLOCATE_HPP
#include <migraphx/config.hpp>
#include <migraphx/allocation_model.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
struct
module
;
struct
replace_allocate
{
allocation_model
model
;
bool
offload_copy
=
false
;
std
::
string
name
()
const
{
return
"replace_allocate"
;
}
void
apply
(
module
&
m
)
const
;
};
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/instruction.cpp
View file @
b3955af4
...
...
@@ -81,6 +81,7 @@ bool instruction::valid(instruction_ref start, bool check_order) const
bool
ret
=
self
!=
i
->
outputs
().
end
();
if
(
check_order
)
{
// check arguments for this instruction before this instruction
ret
=
ret
and
(
std
::
distance
(
start
,
i
)
<
std
::
distance
(
start
,
*
self
));
}
return
ret
;
...
...
src/module.cpp
View file @
b3955af4
...
...
@@ -510,9 +510,8 @@ instruction_ref module::validate() const
return
std
::
find_if
(
impl
->
instructions
.
begin
(),
impl
->
instructions
.
end
(),
[
&
](
const
instruction
&
i
)
{
auto
inputs
=
i
.
inputs
();
bool
check_order
=
std
::
all_of
(
inputs
.
begin
(),
inputs
.
end
(),
[
&
](
auto
in
)
{
return
contains
(
impl
->
instructions
,
*
in
);
});
bool
check_order
=
std
::
all_of
(
inputs
.
begin
(),
inputs
.
end
(),
[
&
](
auto
in
)
{
return
has_instruction
(
in
);
});
return
!
i
.
valid
(
impl
->
instructions
.
begin
(),
check_order
);
});
}
...
...
src/replace_allocate.cpp
0 → 100644
View file @
b3955af4
#include <migraphx/replace_allocate.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/program.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/op/allocate.hpp>
#include <map>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
std
::
unordered_map
<
instruction_ref
,
std
::
string
>
create_output_names
(
const
module
&
mod
)
{
std
::
unordered_map
<
instruction_ref
,
std
::
string
>
mod_output_names
{};
auto
last
=
std
::
prev
(
mod
.
end
());
if
(
last
->
name
()
==
"@return"
)
{
const
auto
&
prog_outputs
=
last
->
inputs
();
std
::
vector
<
instruction_ref
>
outputs_alias
(
prog_outputs
.
size
());
std
::
transform
(
prog_outputs
.
begin
(),
prog_outputs
.
end
(),
outputs_alias
.
begin
(),
[](
const
auto
&
i
)
{
return
instruction
::
get_output_alias
(
i
);
});
std
::
size_t
index
=
0
;
for
(
auto
ins
:
outputs_alias
)
{
mod_output_names
[
ins
]
=
mod
.
name
()
+
":#output_"
+
std
::
to_string
(
index
++
);
}
}
else
{
auto
ins
=
instruction
::
get_output_alias
(
last
);
mod_output_names
[
ins
]
=
"output"
;
}
return
mod_output_names
;
}
void
insert_submod_allocations
(
instruction_ref
ins
,
module
&
mod
,
const
allocation_model
&
model
)
{
std
::
vector
<
instruction_ref
>
inputs
=
ins
->
inputs
();
std
::
vector
<
module_ref
>
mod_args
=
ins
->
module_inputs
();
std
::
map
<
std
::
string
,
shape
>
name_shapes
;
for
(
const
auto
&
smod
:
mod_args
)
{
auto
ps
=
smod
->
get_parameter_shapes
();
name_shapes
.
insert
(
ps
.
begin
(),
ps
.
end
());
}
for
(
auto
&
pn
:
name_shapes
)
{
const
auto
&
s
=
pn
.
second
;
instruction_ref
output
{};
output
=
mod
.
insert_instruction
(
ins
,
model
.
allocate
(
s
));
inputs
.
push_back
(
output
);
}
mod
.
replace_instruction
(
ins
,
ins
->
get_operator
(),
inputs
,
mod_args
);
}
void
replace_allocate
::
apply
(
module
&
m
)
const
{
auto
mod_output_names
=
create_output_names
(
m
);
bool
main_offload_copy
=
m
.
name
()
==
"main"
?
this
->
offload_copy
:
false
;
for
(
auto
ins
:
iterator_for
(
m
))
{
auto
op
=
ins
->
get_operator
();
auto
op_name
=
op
.
name
();
// check if allocations from submodules need to be inserted
// for now, only the "if" operator is affected
if
(
op_name
==
"if"
)
{
insert_submod_allocations
(
ins
,
m
,
model
);
continue
;
}
if
(
op_name
!=
"allocate"
)
continue
;
auto
s
=
ins
->
get_shape
();
if
(
not
main_offload_copy
and
model
.
needs_out_params
()
and
contains
(
mod_output_names
,
ins
))
{
auto
out_param
=
m
.
add_parameter
(
mod_output_names
[
ins
],
s
);
m
.
replace_instruction
(
ins
,
out_param
);
continue
;
}
m
.
replace_instruction
(
ins
,
m
.
insert_instruction
(
ins
,
make_op
(
model
.
name
(),
migraphx
::
value
{{
"shape"
,
to_value
(
s
)}})));
}
}
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/cpu/include/migraphx/cpu/allocation_model.hpp
100755 → 100644
View file @
b3955af4
...
...
@@ -15,6 +15,7 @@ struct cpu_allocation_model
std
::
string
copy
()
const
;
operation
allocate
(
const
shape
&
s
)
const
;
operation
preallocate
(
const
shape
&
s
,
const
std
::
string
&
id
)
const
;
bool
needs_out_params
()
const
{
return
false
;
}
};
}
// namespace cpu
...
...
src/targets/cpu/lowering.cpp
View file @
b3955af4
...
...
@@ -291,30 +291,8 @@ struct cpu_apply
{
module
*
modl
;
std
::
unordered_map
<
std
::
string
,
std
::
function
<
instruction_ref
(
instruction_ref
)
>>
apply_map
{};
std
::
unordered_map
<
instruction_ref
,
std
::
string
>
prog_output_names
{};
instruction_ref
last
{};
void
create_output_names
()
{
this
->
last
=
instruction
::
get_output_alias
(
std
::
prev
(
modl
->
end
()));
if
(
this
->
last
->
name
()
==
"@return"
)
{
const
auto
&
prog_outputs
=
last
->
inputs
();
std
::
vector
<
instruction_ref
>
outputs_alias
(
prog_outputs
.
size
());
std
::
transform
(
prog_outputs
.
begin
(),
prog_outputs
.
end
(),
outputs_alias
.
begin
(),
[](
const
auto
&
i
)
{
return
instruction
::
get_output_alias
(
i
);
});
std
::
size_t
index
=
0
;
for
(
auto
ins
:
outputs_alias
)
{
prog_output_names
[
ins
]
=
modl
->
name
()
+
":#output_"
+
std
::
to_string
(
index
++
);
}
}
}
void
extend_op
(
const
std
::
string
&
op_name
,
const
std
::
string
&
cpu_name
,
bool
allocate
=
true
)
{
apply_map
.
emplace
(
op_name
,
[
=
](
instruction_ref
ins
)
{
...
...
@@ -360,7 +338,6 @@ struct cpu_apply
void
init
()
{
create_output_names
();
extend_dnnl_algos
(
"dnnl::binary"
,
{
{
"add"
,
"binary_add"
},
...
...
@@ -490,7 +467,7 @@ struct cpu_apply
instruction_ref
insert_allocation
(
instruction_ref
ins
,
const
shape
&
s
)
const
{
return
modl
->
insert_instruction
(
ins
,
make_op
(
"
cpu::
allocate"
,
{{
"shape"
,
to_value
(
s
)}}));
return
modl
->
insert_instruction
(
ins
,
make_op
(
"allocate"
,
{{
"shape"
,
to_value
(
s
)}}));
}
};
...
...
src/targets/cpu/target.cpp
View file @
b3955af4
...
...
@@ -13,6 +13,7 @@
#include <migraphx/memory_coloring.hpp>
#include <migraphx/propagate_constant.hpp>
#include <migraphx/register_target.hpp>
#include <migraphx/replace_allocate.hpp>
#include <migraphx/rewrite_batchnorm.hpp>
#include <migraphx/rewrite_pooling.hpp>
#include <migraphx/rewrite_quantization.hpp>
...
...
@@ -70,6 +71,8 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
lowering
{},
eliminate_contiguous
{
"dnnl::reorder"
},
dead_code_elimination
{},
replace_allocate
{
cpu_allocation_model
{}},
dead_code_elimination
{},
adjust_allocation
{
cpu_allocation_model
{}},
dead_code_elimination
{},
fuse_ops
{
&
ctx
},
...
...
src/targets/gpu/compile_gen.cpp
View file @
b3955af4
...
...
@@ -10,7 +10,7 @@ namespace gen {
static
std
::
vector
<
std
::
size_t
>
vector_sizes
(
const
std
::
vector
<
shape
>&
inputs
)
{
// If all inputs
is
half then only use half2
// If all inputs
are
half then only use half2
if
(
std
::
all_of
(
inputs
.
begin
(),
inputs
.
end
(),
[](
const
auto
&
s
)
{
return
s
.
type
()
==
shape
::
half_type
;
}))
...
...
src/targets/gpu/include/migraphx/gpu/allocation_model.hpp
View file @
b3955af4
...
...
@@ -16,6 +16,7 @@ struct gpu_allocation_model
std
::
string
copy
()
const
;
operation
allocate
(
const
shape
&
s
)
const
;
operation
preallocate
(
const
shape
&
s
,
const
std
::
string
&
id
)
const
;
bool
needs_out_params
()
const
{
return
true
;
}
};
}
// namespace gpu
...
...
src/targets/gpu/include/migraphx/gpu/miopen.hpp
View file @
b3955af4
...
...
@@ -11,9 +11,10 @@
#include <sstream>
#ifdef HAS_FIND_MODE_API
extern
"C"
miopenStatus_t
miopenHiddenSetConvolutionFindMode
(
miopenConvolutionDescriptor_t
convDesc
,
int
findMode
);
#ifdef MIGRAPHX_HAS_FIND_MODE_API
extern
"C"
miopenStatus_t
miopenHiddenSetConvolutionFindMode
(
miopenConvolutionDescriptor_t
convDesc
,
// NOLINT
int
findMode
);
// NOLINT
#endif
namespace
migraphx
{
...
...
@@ -104,7 +105,7 @@ inline convolution_descriptor make_conv(const T& op)
c
.
get
(),
padding
.
size
(),
padding
.
data
(),
stride
.
data
(),
dilation
.
data
(),
c_mode
);
if
(
op
.
group
>
1
)
miopenSetConvolutionGroupCount
(
c
.
get
(),
op
.
group
);
#ifdef HAS_FIND_MODE_API
#ifdef
MIGRAPHX_
HAS_FIND_MODE_API
miopenHiddenSetConvolutionFindMode
(
c
.
get
(),
1
);
// Normal mode
#endif
return
c
;
...
...
src/targets/gpu/include/migraphx/gpu/quant_convolution.hpp
View file @
b3955af4
...
...
@@ -2,6 +2,7 @@
#define MIGRAPHX_GUARD_RTGLIB_QUANT_CONVOLUTION_HPP
#include <migraphx/shape.hpp>
#include <migraphx/reflect.hpp>
#include <migraphx/op/quant_convolution.hpp>
#include <migraphx/gpu/miopen.hpp>
...
...
@@ -14,6 +15,7 @@ struct context;
struct
miopen_quant_convolution
{
op
::
quant_convolution
op
;
bool
int8_x4_format
=
false
;
shared
<
convolution_descriptor
>
cd
;
miopenConvFwdAlgorithm_t
algo
{};
miopenHandle_t
handle
=
nullptr
;
...
...
@@ -22,7 +24,8 @@ struct miopen_quant_convolution
static
auto
reflect
(
Self
&
self
,
F
f
)
{
// TODO: Add algo
return
op
::
quant_convolution
::
reflect
(
self
.
op
,
f
);
return
pack_join
(
migraphx
::
reflect
(
self
.
op
,
f
),
pack
(
f
(
self
.
int8_x4_format
,
"int8_x4_format"
)));
}
std
::
string
name
()
const
{
return
"gpu::quant_convolution"
;
}
...
...
src/targets/gpu/lowering.cpp
View file @
b3955af4
...
...
@@ -58,7 +58,6 @@ struct miopen_apply
const
lowering
*
pass
=
nullptr
;
std
::
unordered_map
<
std
::
string
,
std
::
function
<
instruction_ref
(
instruction_ref
)
>>
apply_map
{};
instruction_ref
last
{};
std
::
unordered_map
<
instruction_ref
,
std
::
string
>
prog_output_names
{};
bool
offload_copy
=
false
;
bool
int8_x4_format
=
true
;
bool
compute_fp32
=
false
;
...
...
@@ -77,27 +76,6 @@ struct miopen_apply
(
void
)
i
;
}
void
create_output_names
()
{
this
->
last
=
instruction
::
get_output_alias
(
std
::
prev
(
mod
->
end
()));
if
(
this
->
last
->
name
()
==
"@return"
)
{
const
auto
&
prog_outputs
=
last
->
inputs
();
std
::
vector
<
instruction_ref
>
outputs_alias
(
prog_outputs
.
size
());
std
::
transform
(
prog_outputs
.
begin
(),
prog_outputs
.
end
(),
outputs_alias
.
begin
(),
[](
const
auto
&
i
)
{
return
instruction
::
get_output_alias
(
i
);
});
std
::
size_t
index
=
0
;
for
(
auto
ins
:
outputs_alias
)
{
prog_output_names
[
ins
]
=
mod
->
name
()
+
":#output_"
+
std
::
to_string
(
index
++
);
}
}
}
const
std
::
unordered_set
<
std
::
string
>&
get_rocblas_fp32_archs
()
{
static
std
::
unordered_set
<
std
::
string
>
supported_archs
{
"gfx908"
,
"gfx90a"
};
...
...
@@ -120,7 +98,6 @@ struct miopen_apply
#endif
offload_copy
=
(
mod
->
name
()
==
"main"
)
?
pass
->
offload_copy
:
false
;
create_output_names
();
add_generic_op
(
"acos"
);
add_generic_op
(
"acosh"
);
...
...
@@ -200,7 +177,7 @@ struct miopen_apply
add_quant_convolution_op
();
}
void
copy_params
()
void
copy_params
()
const
{
if
(
not
offload_copy
)
return
;
...
...
@@ -260,7 +237,7 @@ struct miopen_apply
copy_params
();
}
instruction_ref
insert_precompile_op
(
instruction_ref
ins
)
instruction_ref
insert_precompile_op
(
instruction_ref
ins
)
const
{
auto
output
=
insert_allocation
(
ins
,
ins
->
get_shape
());
std
::
vector
<
instruction_ref
>
refs
=
ins
->
inputs
();
...
...
@@ -273,28 +250,9 @@ struct miopen_apply
ins
->
module_inputs
());
}
instruction_ref
insert_allocation
(
instruction_ref
ins
,
const
shape
&
s
,
std
::
string
tag
=
""
)
instruction_ref
insert_allocation
(
instruction_ref
ins
,
const
shape
&
s
)
const
{
// Instruction's output is an input of the ret instruction
if
(
offload_copy
)
{
auto
result
=
mod
->
insert_instruction
(
ins
,
make_op
(
"hip::allocate"
,
{{
"shape"
,
to_value
(
s
)},
{
"tag"
,
std
::
move
(
tag
)}}));
return
result
;
}
auto
ins_alias
=
instruction
::
get_output_alias
(
ins
);
if
(
last
->
name
()
==
"@return"
and
tag
.
empty
()
and
prog_output_names
.
count
(
ins_alias
)
>
0
)
{
return
mod
->
add_parameter
(
prog_output_names
[
ins_alias
],
s
);
}
else
if
(
ins
==
last
and
tag
.
empty
())
{
return
mod
->
add_parameter
(
"output"
,
s
);
}
return
mod
->
insert_instruction
(
ins
,
make_op
(
"hip::allocate"
,
{{
"shape"
,
to_value
(
s
)},
{
"tag"
,
std
::
move
(
tag
)}}));
return
mod
->
insert_instruction
(
ins
,
make_op
(
"allocate"
,
{{
"shape"
,
to_value
(
s
)}}));
}
void
add_convolution_op
()
...
...
@@ -305,7 +263,7 @@ struct miopen_apply
auto
conv
=
miopen_convolution
{
op
,
make_conv
(
op
)};
auto
ws
=
conv
.
find
(
get_context
(),
ins
->
get_shape
(),
to_shapes
(
ins
->
inputs
()));
auto
workspace
=
insert_allocation
(
ins
,
ws
,
"workspace"
);
auto
workspace
=
insert_allocation
(
ins
,
ws
);
auto
output
=
insert_allocation
(
ins
,
ins
->
get_shape
());
return
mod
->
replace_instruction
(
...
...
@@ -321,7 +279,7 @@ struct miopen_apply
auto
conv
=
miopen_deconvolution
{
op
,
make_deconv
(
op
)};
auto
ws
=
conv
.
compile
(
get_context
(),
ins
->
get_shape
(),
to_shapes
(
ins
->
inputs
()));
auto
workspace
=
insert_allocation
(
ins
,
ws
,
"workspace"
);
auto
workspace
=
insert_allocation
(
ins
,
ws
);
auto
output
=
insert_allocation
(
ins
,
ins
->
get_shape
());
return
mod
->
replace_instruction
(
...
...
@@ -334,27 +292,9 @@ struct miopen_apply
{
apply_map
.
emplace
(
name
,
[
=
](
instruction_ref
ins
)
{
std
::
vector
<
instruction_ref
>
refs
=
ins
->
inputs
();
if
(
refs
.
size
()
==
2
)
{
auto
output
=
insert_allocation
(
ins
,
ins
->
get_shape
());
refs
.
push_back
(
output
);
}
else
{
auto
c_alias
=
instruction
::
get_output_alias
(
refs
.
back
());
if
(
ins
==
last
or
refs
.
back
()
->
outputs
().
size
()
>
1
or
c_alias
->
inputs
().
empty
())
{
auto
output
=
insert_allocation
(
ins
,
ins
->
get_shape
());
auto
copy_out
=
mod
->
insert_instruction
(
ins
,
make_op
(
"hip::copy"
),
refs
.
back
(),
output
);
refs
.
back
()
=
copy_out
;
refs
.
push_back
(
copy_out
);
}
else
{
refs
.
push_back
(
refs
.
back
());
}
}
assert
(
refs
.
size
()
==
2
);
auto
output
=
insert_allocation
(
ins
,
ins
->
get_shape
());
refs
.
push_back
(
output
);
return
mod
->
replace_instruction
(
ins
,
rocblas_gemm
<
Op
>
{
Op
{},
1
,
0
,
int8_x4_format
,
compute_fp32
},
refs
);
});
...
...
@@ -364,11 +304,25 @@ struct miopen_apply
{
apply_map
.
emplace
(
"quant_convolution"
,
[
=
](
instruction_ref
ins
)
{
auto
&&
op
=
any_cast
<
op
::
quant_convolution
>
(
ins
->
get_operator
());
auto
conv
=
miopen_quant_convolution
{
op
,
make_conv
(
op
)};
auto
ws
=
conv
.
compile
(
get_context
(),
ins
->
get_shape
(),
to_shapes
(
ins
->
inputs
()));
shape
ws
;
miopen_quant_convolution
conv
;
auto
compile_quant_conv_with_format
=
[
&
](
bool
format
)
{
conv
=
miopen_quant_convolution
{
op
,
format
,
make_conv
(
op
)};
ws
=
conv
.
compile
(
get_context
(),
ins
->
get_shape
(),
to_shapes
(
ins
->
inputs
()));
};
try
{
compile_quant_conv_with_format
(
int8_x4_format
);
}
catch
(
migraphx
::
exception
&
)
{
// In case no solver supports the default format, retry using the other format.
compile_quant_conv_with_format
(
!
int8_x4_format
);
}
auto
args
=
ins
->
inputs
();
auto
workspace
=
insert_allocation
(
ins
,
ws
,
"workspace"
);
auto
workspace
=
insert_allocation
(
ins
,
ws
);
auto
output
=
insert_allocation
(
ins
,
ins
->
get_shape
());
return
mod
->
replace_instruction
(
ins
,
conv
,
args
[
0
],
args
[
1
],
workspace
,
output
);
...
...
@@ -465,33 +419,7 @@ struct miopen_apply
auto
sync_cond
=
mod
->
insert_instruction
(
ins
,
make_op
(
"hip::sync_stream"
),
cpu_cond
);
inputs
.
front
()
=
sync_cond
;
std
::
vector
<
module_ref
>
mod_args
=
ins
->
module_inputs
();
std
::
map
<
std
::
string
,
shape
>
name_shapes
;
for
(
const
auto
&
smod
:
mod_args
)
{
auto
ps
=
smod
->
get_parameter_shapes
();
name_shapes
.
insert
(
ps
.
begin
(),
ps
.
end
());
}
bool
ins_output_allocated
=
false
;
for
(
auto
&
pn
:
name_shapes
)
{
const
auto
&
s
=
pn
.
second
;
instruction_ref
output
{};
if
(
s
==
ins
->
get_shape
()
and
not
ins_output_allocated
)
{
output
=
insert_allocation
(
ins
,
s
);
ins_output_allocated
=
true
;
}
else
{
output
=
mod
->
insert_instruction
(
ins
,
make_op
(
"hip::allocate"
,
{{
"shape"
,
to_value
(
s
)}}));
}
inputs
.
push_back
(
output
);
}
return
mod
->
replace_instruction
(
ins
,
ins
->
get_operator
(),
inputs
,
mod_args
);
return
mod
->
replace_instruction
(
ins
,
ins
->
get_operator
(),
inputs
,
ins
->
module_inputs
());
});
}
...
...
@@ -510,20 +438,17 @@ struct miopen_apply
inputs
.
at
(
0
)
=
synced_max_iter
;
inputs
.
at
(
1
)
=
cpu_cond
;
auto
copy_inputs
=
inputs
;
std
::
transform
(
copy_inputs
.
begin
(),
copy_inputs
.
end
(),
std
::
back_inserter
(
inputs
),
[
&
](
auto
in
)
{
return
mod
->
insert_instruction
(
ins
,
make_op
(
"hip::allocate"
,
{{
"shape"
,
to_value
(
in
->
get_shape
())}}));
});
std
::
transform
(
copy_inputs
.
begin
(),
copy_inputs
.
end
(),
std
::
back_inserter
(
inputs
),
[
&
](
auto
in
)
{
return
insert_allocation
(
ins
,
in
->
get_shape
());
});
auto
mod_args
=
ins
->
module_inputs
();
auto
output
=
insert_allocation
(
ins
,
ins
->
get_shape
());
const
auto
*
sub_mod
=
mod_args
.
front
();
auto
cond_out
=
mod
->
insert_instruction
(
ins
,
make_op
(
"hip::allocate"
,
{{
"shape"
,
to_value
(
sub_mod
->
get_output_shapes
().
front
())}}));
auto
cond_out
=
insert_allocation
(
ins
,
sub_mod
->
get_output_shapes
().
front
());
// add cond and mod outputs to the argument list
inputs
.
push_back
(
cond_out
);
inputs
.
push_back
(
output
);
...
...
src/targets/gpu/pack_int8_args.cpp
View file @
b3955af4
...
...
@@ -118,7 +118,7 @@ void pack_int8_args::apply(module& m) const
assert
(
val
.
contains
(
"int8_x4_format"
));
if
(
not
val
.
at
(
"int8_x4_format"
).
to
<
bool
>
())
{
return
;
continue
;
}
auto
inputs
=
ins
->
inputs
();
auto
lens
=
inputs
.
at
(
0
)
->
get_shape
().
lens
();
...
...
@@ -156,6 +156,12 @@ void pack_int8_args::apply(module& m) const
}
else
if
(
ins
->
name
()
==
"gpu::quant_convolution"
)
{
auto
val
=
ins
->
get_operator
().
to_value
();
if
(
not
val
.
at
(
"int8_x4_format"
).
to
<
bool
>
())
{
continue
;
}
auto
inputs
=
ins
->
inputs
();
auto
packed_x
=
m
.
insert_instruction
(
ins
,
...
...
src/targets/gpu/quant_convolution.cpp
View file @
b3955af4
...
...
@@ -16,8 +16,8 @@ argument miopen_quant_convolution::compute(context& ctx,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
{
auto
x_desc
=
make_tensor
(
args
[
0
].
get_shape
(),
true
);
auto
w_desc
=
make_tensor
(
args
[
1
].
get_shape
(),
true
);
auto
x_desc
=
make_tensor
(
args
[
0
].
get_shape
(),
int8_x4_format
);
auto
w_desc
=
make_tensor
(
args
[
1
].
get_shape
(),
int8_x4_format
);
auto
y_desc
=
make_tensor
(
output_shape
);
float
alpha
=
1
;
...
...
@@ -49,8 +49,8 @@ shape miopen_quant_convolution::compile(context& ctx,
std
::
vector
<
shape
>
inputs
)
{
shape
workspace_shape
{};
auto
x_desc
=
make_tensor
(
inputs
[
0
],
true
);
auto
w_desc
=
make_tensor
(
inputs
[
1
],
true
);
auto
x_desc
=
make_tensor
(
inputs
[
0
],
int8_x4_format
);
auto
w_desc
=
make_tensor
(
inputs
[
1
],
int8_x4_format
);
auto
y_desc
=
make_tensor
(
output_shape
);
std
::
size_t
workspace_size
=
0
;
...
...
@@ -62,8 +62,15 @@ shape miopen_quant_convolution::compile(context& ctx,
&
workspace_size
);
workspace_shape
=
shape
{
shape
::
int8_type
,
{
workspace_size
}};
auto
arg_vec4_x
=
to_gpu
(
generate_argument
(
pack_int8_shape
(
inputs
[
0
])));
auto
arg_vec4_w
=
to_gpu
(
generate_argument
(
pack_int8_shape
(
inputs
[
1
])));
auto
x_shape
=
inputs
[
0
];
auto
w_shape
=
inputs
[
1
];
if
(
int8_x4_format
)
{
x_shape
=
pack_int8_shape
(
x_shape
);
w_shape
=
pack_int8_shape
(
w_shape
);
}
auto
arg_vec4_x
=
to_gpu
(
generate_argument
(
x_shape
));
auto
arg_vec4_w
=
to_gpu
(
generate_argument
(
w_shape
));
auto
y
=
allocate_gpu
(
output_shape
);
auto
workspace
=
allocate_gpu
(
workspace_shape
);
...
...
src/targets/gpu/target.cpp
View file @
b3955af4
...
...
@@ -17,6 +17,7 @@
#include <migraphx/preallocate_param.hpp>
#include <migraphx/propagate_constant.hpp>
#include <migraphx/register_target.hpp>
#include <migraphx/replace_allocate.hpp>
#include <migraphx/rewrite_batchnorm.hpp>
#include <migraphx/rewrite_pooling.hpp>
#include <migraphx/rewrite_quantization.hpp>
...
...
@@ -109,6 +110,8 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
lowering
{
&
ctx
,
options
.
offload_copy
},
eliminate_contiguous
{
"gpu::contiguous"
},
dead_code_elimination
{},
replace_allocate
{
gpu_allocation_model
{},
options
.
offload_copy
},
dead_code_elimination
{},
eliminate_concat
{
concat_gpu_optimization
{}},
dead_code_elimination
{},
pack_int8_args
{},
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment