Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
22bb777f
Unverified
Commit
22bb777f
authored
Oct 30, 2023
by
Umang Yadav
Committed by
GitHub
Oct 30, 2023
Browse files
Remove int8x4 format completely (#2373)
parent
409fd18c
Changes
25
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
29 additions
and
801 deletions
+29
-801
examples/migraphx/custom_op_miopen_kernel/custom_op_miopen_kernel.cpp
...raphx/custom_op_miopen_kernel/custom_op_miopen_kernel.cpp
+2
-16
examples/migraphx/migraphx_driver/README.md
examples/migraphx/migraphx_driver/README.md
+0
-3
src/targets/gpu/CMakeLists.txt
src/targets/gpu/CMakeLists.txt
+0
-8
src/targets/gpu/compile_miopen.cpp
src/targets/gpu/compile_miopen.cpp
+4
-15
src/targets/gpu/device/int8_gemm_pack.cpp
src/targets/gpu/device/int8_gemm_pack.cpp
+0
-97
src/targets/gpu/gemm_impl.cpp
src/targets/gpu/gemm_impl.cpp
+2
-14
src/targets/gpu/include/migraphx/gpu/compile_miopen.hpp
src/targets/gpu/include/migraphx/gpu/compile_miopen.hpp
+1
-1
src/targets/gpu/include/migraphx/gpu/convolution.hpp
src/targets/gpu/include/migraphx/gpu/convolution.hpp
+9
-31
src/targets/gpu/include/migraphx/gpu/device/int8_gemm_pack.hpp
...argets/gpu/include/migraphx/gpu/device/int8_gemm_pack.hpp
+0
-49
src/targets/gpu/include/migraphx/gpu/gemm.hpp
src/targets/gpu/include/migraphx/gpu/gemm.hpp
+2
-10
src/targets/gpu/include/migraphx/gpu/gemm_impl.hpp
src/targets/gpu/include/migraphx/gpu/gemm_impl.hpp
+0
-2
src/targets/gpu/include/migraphx/gpu/int8_conv_pack.hpp
src/targets/gpu/include/migraphx/gpu/int8_conv_pack.hpp
+0
-52
src/targets/gpu/include/migraphx/gpu/int8_gemm_pack.hpp
src/targets/gpu/include/migraphx/gpu/int8_gemm_pack.hpp
+0
-63
src/targets/gpu/include/migraphx/gpu/miopen.hpp
src/targets/gpu/include/migraphx/gpu/miopen.hpp
+2
-16
src/targets/gpu/include/migraphx/gpu/pack_int8_args.hpp
src/targets/gpu/include/migraphx/gpu/pack_int8_args.hpp
+0
-46
src/targets/gpu/include/migraphx/gpu/rocblas.hpp
src/targets/gpu/include/migraphx/gpu/rocblas.hpp
+0
-2
src/targets/gpu/int8_conv_pack.cpp
src/targets/gpu/int8_conv_pack.cpp
+0
-78
src/targets/gpu/int8_gemm_pack.cpp
src/targets/gpu/int8_gemm_pack.cpp
+0
-60
src/targets/gpu/lowering.cpp
src/targets/gpu/lowering.cpp
+7
-13
src/targets/gpu/pack_int8_args.cpp
src/targets/gpu/pack_int8_args.cpp
+0
-225
No files found.
examples/migraphx/custom_op_miopen_kernel/custom_op_miopen_kernel.cpp
View file @
22bb777f
...
...
@@ -32,7 +32,7 @@
#define MIGRAPHX_MIOPEN_ASSERT(x) (assert((x) == miopenStatusSuccess))
#define MIGRAPHX_HIP_ASSERT(x) (assert((x) == hipSuccess))
inline
miopenTensorDescriptor_t
make_miopen_tensor
(
const
migraphx
::
shape
&
s
,
bool
pack
=
false
)
inline
miopenTensorDescriptor_t
make_miopen_tensor
(
const
migraphx
::
shape
&
s
)
{
miopenTensorDescriptor_t
t
;
MIGRAPHX_MIOPEN_ASSERT
(
miopenCreateTensorDescriptor
(
&
t
));
...
...
@@ -49,23 +49,9 @@ inline miopenTensorDescriptor_t make_miopen_tensor(const migraphx::shape& s, boo
else
if
(
s
.
type
()
==
migraphx_shape_int32_type
)
d
=
miopenInt32
;
else
if
(
s
.
type
()
==
migraphx_shape_int8_type
)
{
if
(
pack
)
{
// update the lens and corresponding strides
d
=
miopenInt8x4
;
lens
[
1
]
=
((
lens
[
1
]
+
3
)
/
4
)
*
4
;
strides
[
0
]
=
strides
[
1
]
*
lens
[
1
];
}
else
{
d
=
miopenInt8
;
}
}
else
{
throw
(
"MAKE_TENSOR: unsupported type"
);
}
miopenSetTensorDescriptor
(
t
,
d
,
s_lens
.
size
(),
lens
.
data
(),
strides
.
data
());
return
t
;
}
...
...
examples/migraphx/migraphx_driver/README.md
View file @
22bb777f
...
...
@@ -149,9 +149,6 @@ gpu::gelu
gpu::gelu_new
gpu::gemm
gpu::greater
gpu::int8_conv_pack
gpu::int8_gemm_pack_a
gpu::int8_gemm_pack_b
gpu::layernorm
gpu::leaky_relu
gpu::less
...
...
src/targets/gpu/CMakeLists.txt
View file @
22bb777f
...
...
@@ -128,8 +128,6 @@ add_library(migraphx_gpu
gather.cpp
gemm_impl.cpp
hip.cpp
int8_conv_pack.cpp
int8_gemm_pack.cpp
kernel.cpp
lowering.cpp
logsoftmax.cpp
...
...
@@ -140,7 +138,6 @@ add_library(migraphx_gpu
no_device.cpp
nonzero.cpp
pack_args.cpp
pack_int8_args.cpp
prefuse_ops.cpp
pad.cpp
perfdb.cpp
...
...
@@ -184,7 +181,6 @@ register_migraphx_gpu_ops(hip_
register_migraphx_gpu_ops
(
miopen_
abs
contiguous
int8_conv_pack
lrn
pooling
)
...
...
@@ -192,10 +188,6 @@ register_op(migraphx_gpu
HEADER migraphx/gpu/rnn_variable_seq_lens.hpp
OPERATORS gpu::hip_rnn_var_sl_shift_sequence gpu::hip_rnn_var_sl_shift_output gpu::hip_rnn_var_sl_last_output
INCLUDES migraphx/gpu/context.hpp
)
register_op
(
migraphx_gpu
HEADER migraphx/gpu/int8_gemm_pack.hpp
OPERATORS gpu::hip_int8_gemm_pack_a gpu::hip_int8_gemm_pack_b
INCLUDES migraphx/gpu/context.hpp
)
register_op
(
migraphx_gpu
HEADER migraphx/gpu/gemm.hpp
OPERATORS gpu::rocblas_gemm<op::dot> gpu::rocblas_gemm<op::quant_dot>
...
...
src/targets/gpu/compile_miopen.cpp
View file @
22bb777f
...
...
@@ -60,9 +60,8 @@ struct miopen_op
};
MIGRAPHX_REGISTER_OP
(
miopen_op
);
std
::
size_t
compile_miopen
::
compile
(
operation
&
op
,
instruction_ref
ins
,
bool
format
)
const
std
::
size_t
compile_miopen
::
compile
(
operation
&
op
,
instruction_ref
ins
)
const
{
op
.
from_value
({{
"int8_x4_format"
,
format
}});
auto
v
=
op
.
compile
(
*
ctx
,
ins
->
get_shape
(),
to_shapes
(
ins
->
inputs
()));
return
v
.
get
<
std
::
size_t
>
(
"workspace"
,
0
);
}
...
...
@@ -70,23 +69,13 @@ std::size_t compile_miopen::compile(operation& op, instruction_ref ins, bool for
void
compile_miopen
::
apply
(
module
&
m
)
const
{
assert
(
ctx
);
const
bool
int8_x4_format
=
get_int8_x4_format
(
any_cast
<
migraphx
::
gpu
::
context
>
(
*
ctx
));
for
(
auto
ins
:
iterator_for
(
m
))
{
if
(
ins
->
name
()
!=
"gpu::miopen_op"
)
continue
;
auto
op
=
any_cast
<
miopen_op
>
(
ins
->
get_operator
()).
op
;
std
::
size_t
ws
=
0
;
try
{
// for the regular convolution and convolution_backwards, this try would always succeed
ws
=
compile
(
op
,
ins
,
int8_x4_format
);
}
catch
(
migraphx
::
exception
&
)
{
// In case no solver supports the default format, retry using the other format.
ws
=
compile
(
op
,
ins
,
not
int8_x4_format
);
}
ws
=
compile
(
op
,
ins
);
auto
inputs
=
ins
->
inputs
();
auto
alloc
=
m
.
insert_instruction
(
ins
,
make_op
(
"allocate"
,
{{
"shape"
,
to_value
(
shape
{
shape
::
int8_type
,
{
ws
}})}}));
...
...
src/targets/gpu/device/int8_gemm_pack.cpp
deleted
100644 → 0
View file @
409fd18c
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/shape.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/gpu/device/int8_gemm_pack.hpp>
#include <migraphx/gpu/device/launch.hpp>
#include <migraphx/gpu/device/types.hpp>
#include <migraphx/gpu/device/tensor.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
void
int8_gemm_pack_a
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
{
auto
comp_shape
=
arg
.
get_shape
();
auto
out_lens
=
comp_shape
.
lens
();
auto
dim_0
=
out_lens
.
size
()
-
2
;
auto
dim_1
=
out_lens
.
size
()
-
1
;
std
::
size_t
lda
=
comp_shape
.
strides
()[
dim_0
];
std
::
size_t
m_size
=
out_lens
[
dim_0
]
*
out_lens
[
dim_1
];
visit_all
(
result
,
arg
)([
&
](
auto
output
,
auto
input
)
{
std
::
size_t
nelements
=
comp_shape
.
elements
();
auto
*
out_ptr
=
device_cast
(
output
.
data
());
auto
*
in_ptr
=
device_cast
(
input
.
data
());
visit_tensor_size
(
out_lens
.
size
(),
[
&
](
auto
out_dim
)
{
hip_tensor_descriptor
<
out_dim
>
desc
(
comp_shape
);
gs_launch
(
stream
,
nelements
,
256
)([
=
](
auto
ii
)
__device__
{
const
size_t
nb
=
4
;
auto
idx
=
desc
.
multi
(
ii
);
std
::
size_t
i_m
=
idx
[
dim_1
];
std
::
size_t
i_k
=
idx
[
dim_0
];
std
::
size_t
offset
=
ii
/
m_size
*
m_size
;
out_ptr
[
i_k
%
nb
+
(
i_m
+
(
i_k
/
nb
)
*
lda
)
*
nb
+
offset
]
=
in_ptr
[
i_m
+
i_k
*
lda
+
offset
];
});
});
});
}
void
int8_gemm_pack_b
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
{
auto
trans_shape
=
arg
.
get_shape
();
auto
out_lens
=
trans_shape
.
lens
();
auto
dim_0
=
trans_shape
.
lens
().
size
()
-
2
;
auto
dim_1
=
trans_shape
.
lens
().
size
()
-
1
;
std
::
size_t
ldb
=
trans_shape
.
strides
()[
dim_1
];
auto
wrap_lens
=
out_lens
;
std
::
swap
(
wrap_lens
[
dim_0
],
wrap_lens
[
dim_1
]);
shape
comp_shape
{
trans_shape
.
type
(),
wrap_lens
};
std
::
size_t
m_size
=
out_lens
[
dim_0
]
*
out_lens
[
dim_1
];
visit_all
(
result
,
arg
)([
&
](
auto
output
,
auto
input
)
{
std
::
size_t
nelements
=
comp_shape
.
elements
();
auto
*
out_ptr
=
device_cast
(
output
.
data
());
auto
*
in_ptr
=
device_cast
(
input
.
data
());
visit_tensor_size
(
out_lens
.
size
(),
[
&
](
auto
out_dim
)
{
hip_tensor_descriptor
<
out_dim
>
desc
(
comp_shape
);
gs_launch
(
stream
,
nelements
,
256
)([
=
](
auto
ii
)
__device__
{
const
size_t
nb
=
4
;
auto
idx
=
desc
.
multi
(
ii
);
std
::
size_t
i_n
=
idx
[
dim_1
];
std
::
size_t
i_k
=
idx
[
dim_0
];
std
::
size_t
offset
=
ii
/
m_size
*
m_size
;
out_ptr
[
i_k
%
nb
+
(
i_n
+
(
i_k
/
nb
)
*
ldb
)
*
nb
+
offset
]
=
in_ptr
[
i_n
+
i_k
*
ldb
+
offset
];
});
});
});
}
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/gemm_impl.cpp
View file @
22bb777f
...
...
@@ -108,7 +108,6 @@ void gemm_impl(context& ctx,
const
std
::
vector
<
argument
>&
args
,
T
alpha
,
T
beta
,
bool
int8_x4_format
,
bool
compute_fp32
)
{
const
bool
is_3inputs
=
(
args
.
size
()
==
4
);
...
...
@@ -141,11 +140,6 @@ void gemm_impl(context& ctx,
}
rocblas_gemm_flags
flag
=
rocblas_gemm_flags_none
;
#if ROCBLAS_VERSION_MAJOR < 3
if
(
int8_x4_format
)
flag
=
rocblas_gemm_flags_pack_int8x4
;
#endif
auto
a_lens
=
args
[
0
].
get_shape
().
lens
();
auto
b_lens
=
args
[
1
].
get_shape
().
lens
();
output_shape
.
visit_type
([
&
](
auto
as
)
{
...
...
@@ -167,10 +161,6 @@ void gemm_impl(context& ctx,
rocblas_int
n
=
out_lens
[
dim_1
];
rocblas_int
k
=
args
[
0
].
get_shape
().
lens
()[
dim_1
];
auto
to_pointer
=
[
&
](
auto
&&
arg
)
{
return
as
.
from
(
arg
.
data
());
};
if
(
args
[
0
].
get_shape
().
type
()
==
shape
::
int8_type
and
(
k
%
4
)
!=
0
and
int8_x4_format
)
{
MIGRAPHX_THROW
(
"ROCBLAS_GEMM: k size of int8 type input must be mutlple of 4!"
);
}
auto
num_matrices
=
std
::
accumulate
(
out_lens
.
rbegin
()
+
2
,
out_lens
.
rend
(),
std
::
size_t
{
1
},
std
::
multiplies
<
std
::
size_t
>
());
...
...
@@ -256,10 +246,9 @@ void gemm(context& ctx,
const
std
::
vector
<
argument
>&
args
,
float
alpha
,
float
beta
,
bool
int8_x4_format
,
bool
compute_fp32
)
{
gemm_impl
(
ctx
,
output_shape
,
args
,
alpha
,
beta
,
int8_x4_format
,
compute_fp32
);
gemm_impl
(
ctx
,
output_shape
,
args
,
alpha
,
beta
,
compute_fp32
);
}
void
gemm
(
context
&
ctx
,
...
...
@@ -267,10 +256,9 @@ void gemm(context& ctx,
const
std
::
vector
<
argument
>&
args
,
int32_t
alpha
,
int32_t
beta
,
bool
int8_x4_format
,
bool
compute_fp32
)
{
gemm_impl
(
ctx
,
output_shape
,
args
,
alpha
,
beta
,
int8_x4_format
,
compute_fp32
);
gemm_impl
(
ctx
,
output_shape
,
args
,
alpha
,
beta
,
compute_fp32
);
}
}
// namespace gpu
...
...
src/targets/gpu/include/migraphx/gpu/compile_miopen.hpp
View file @
22bb777f
...
...
@@ -42,7 +42,7 @@ struct compile_miopen
context
*
ctx
=
nullptr
;
std
::
string
name
()
const
{
return
"gpu::compile_miopen"
;
}
void
apply
(
module
&
m
)
const
;
std
::
size_t
compile
(
operation
&
op
,
instruction_ref
ins
,
bool
format
)
const
;
std
::
size_t
compile
(
operation
&
op
,
instruction_ref
ins
)
const
;
};
}
// namespace gpu
...
...
src/targets/gpu/include/migraphx/gpu/convolution.hpp
View file @
22bb777f
...
...
@@ -57,7 +57,6 @@ template <class Op>
struct
miopen_convolution
{
Op
op
;
bool
int8_x4_format
=
false
;
shared
<
convolution_descriptor
>
cd
=
nullptr
;
miopenConvFwdAlgorithm_t
algo
{};
#ifdef MIGRAPHX_HAS_FIND_2_API
...
...
@@ -74,7 +73,6 @@ struct miopen_convolution
f
(
self
.
solution_object
,
"solution_object"
),
#endif
f
(
self
.
algo
,
"algo"
),
f
(
self
.
int8_x4_format
,
"int8_x4_format"
),
f
(
self
.
solution_id
,
"solution_id"
));
}
...
...
@@ -94,8 +92,8 @@ struct miopen_convolution
argument
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
{
auto
x_desc
=
make_tensor
(
reshape_if_1d
(
args
[
0
].
get_shape
())
,
int8_x4_format
);
auto
w_desc
=
make_tensor
(
reshape_if_1d
(
args
[
1
].
get_shape
())
,
int8_x4_format
);
auto
x_desc
=
make_tensor
(
reshape_if_1d
(
args
[
0
].
get_shape
()));
auto
w_desc
=
make_tensor
(
reshape_if_1d
(
args
[
1
].
get_shape
()));
auto
y_desc
=
make_tensor
(
reshape_if_1d
(
output_shape
));
auto
*
miopen_stream_handle
=
ctx
.
get_stream
().
get_miopen
();
auto
workspace_size
=
args
[
2
].
get_shape
().
bytes
();
...
...
@@ -162,8 +160,8 @@ struct miopen_convolution
shape
find
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
shape
>&
inputs
)
{
shape
workspace_shape
{};
auto
x_desc
=
make_tensor
(
reshape_if_1d
(
inputs
[
0
])
,
int8_x4_format
);
auto
w_desc
=
make_tensor
(
reshape_if_1d
(
inputs
[
1
])
,
int8_x4_format
);
auto
x_desc
=
make_tensor
(
reshape_if_1d
(
inputs
[
0
]));
auto
w_desc
=
make_tensor
(
reshape_if_1d
(
inputs
[
1
]));
auto
y_desc
=
make_tensor
(
reshape_if_1d
(
output_shape
));
auto
*
miopen_stream_handle
=
ctx
.
get_stream
().
get_miopen
();
...
...
@@ -179,13 +177,8 @@ struct miopen_convolution
workspace_shape
=
shape
{
shape
::
int8_type
,
{
workspace_size
}};
auto
x_shape
=
inputs
[
0
];
auto
w_shape
=
inputs
[
1
];
if
(
int8_x4_format
)
{
x_shape
=
pack_int8_shape
(
x_shape
);
w_shape
=
pack_int8_shape
(
w_shape
);
}
const
auto
&
x_shape
=
inputs
[
0
];
const
auto
&
w_shape
=
inputs
[
1
];
#ifdef MIGRAPHX_HAS_FIND_2_API
{
...
...
@@ -327,8 +320,8 @@ struct miopen_convolution
": workspace has changed during finalization."
);
}
auto
x_desc
=
make_tensor
(
reshape_if_1d
(
inputs
[
0
])
,
int8_x4_format
);
auto
w_desc
=
make_tensor
(
reshape_if_1d
(
inputs
[
1
])
,
int8_x4_format
);
auto
x_desc
=
make_tensor
(
reshape_if_1d
(
inputs
[
0
]));
auto
w_desc
=
make_tensor
(
reshape_if_1d
(
inputs
[
1
]));
auto
y_desc
=
make_tensor
(
reshape_if_1d
(
output_shape
));
auto
status
=
miopenConvolutionForwardCompileSolution
(
ctx
.
get_stream
().
get_miopen
(),
...
...
@@ -347,21 +340,6 @@ struct miopen_convolution
{
return
shapes
.
size
()
-
1
;
}
inline
shape
pack_int8_shape
(
const
shape
&
s
)
const
{
if
(
s
.
type
()
!=
shape
::
int8_type
)
{
return
s
;
}
auto
lens
=
s
.
lens
();
auto
strides
=
s
.
strides
();
lens
[
1
]
=
(
lens
[
1
]
+
3
)
/
4
*
4
;
strides
[
0
]
=
strides
[
1
]
*
lens
[
1
];
return
{
s
.
type
(),
lens
,
strides
};
}
};
}
// namespace gpu
...
...
src/targets/gpu/include/migraphx/gpu/device/int8_gemm_pack.hpp
deleted
100644 → 0
View file @
409fd18c
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_INT8_GEMM_PACK_HPP
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_INT8_GEMM_PACK_HPP
#include <migraphx/argument.hpp>
#include <migraphx/gpu/device/config.hpp>
#include <hip/hip_runtime_api.h>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
void
MIGRAPHX_DEVICE_EXPORT
int8_gemm_pack_a
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
);
void
MIGRAPHX_DEVICE_EXPORT
int8_gemm_pack_b
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
);
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/include/migraphx/gpu/gemm.hpp
View file @
22bb777f
...
...
@@ -50,7 +50,6 @@ struct rocblas_gemm
Op
op
;
float
alpha
=
1
;
float
beta
=
0
;
bool
int8_x4_format
=
true
;
bool
compute_fp32
=
false
;
unsigned
trans_batch
=
0
;
...
...
@@ -60,7 +59,6 @@ struct rocblas_gemm
return
pack_join
(
migraphx
::
reflect
(
self
.
op
,
f
),
pack
(
f
(
self
.
alpha
,
"alpha"
),
f
(
self
.
beta
,
"beta"
),
f
(
self
.
int8_x4_format
,
"int8_x4_format"
),
f
(
self
.
compute_fp32
,
"compute_fp32"
),
f
(
self
.
trans_batch
,
"trans_batch"
)));
}
...
...
@@ -113,17 +111,11 @@ struct rocblas_gemm
{
if
(
this
->
name
()
==
"gpu::gemm"
)
{
gemm
(
ctx
,
output_shape
,
args
,
alpha
,
beta
,
int8_x4_format
,
compute_fp32
);
gemm
(
ctx
,
output_shape
,
args
,
alpha
,
beta
,
compute_fp32
);
}
else
{
gemm
(
ctx
,
output_shape
,
args
,
int32_t
(
alpha
),
int32_t
(
beta
),
int8_x4_format
,
compute_fp32
);
gemm
(
ctx
,
output_shape
,
args
,
int32_t
(
alpha
),
int32_t
(
beta
),
compute_fp32
);
}
return
args
.
back
();
}
...
...
src/targets/gpu/include/migraphx/gpu/gemm_impl.hpp
View file @
22bb777f
...
...
@@ -37,14 +37,12 @@ void gemm(context& ctx,
const
std
::
vector
<
argument
>&
args
,
float
alpha
,
float
beta
,
bool
int8_x4_format
,
bool
compute_fp32
);
void
gemm
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
,
int32_t
alpha
,
int32_t
beta
,
bool
int8_x4_format
,
bool
compute_fp32
);
}
// namespace gpu
...
...
src/targets/gpu/include/migraphx/gpu/int8_conv_pack.hpp
deleted
100644 → 0
View file @
409fd18c
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_RTGLIB_INT8_CONV_PACK_HPP
#define MIGRAPHX_GUARD_RTGLIB_INT8_CONV_PACK_HPP
#include <migraphx/argument.hpp>
#include <migraphx/config.hpp>
#include <utility>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
struct
context
;
struct
miopen_int8_conv_pack
{
std
::
string
name
()
const
{
return
"gpu::int8_conv_pack"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
;
argument
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
;
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/include/migraphx/gpu/int8_gemm_pack.hpp
deleted
100644 → 0
View file @
409fd18c
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_RTGLIB_INT8_GEMM_PACK_HPP
#define MIGRAPHX_GUARD_RTGLIB_INT8_GEMM_PACK_HPP
#include <migraphx/argument.hpp>
#include <migraphx/config.hpp>
#include <utility>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
struct
context
;
struct
hip_int8_gemm_pack_a
{
std
::
string
name
()
const
{
return
"gpu::int8_gemm_pack_a"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
;
argument
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
;
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
};
struct
hip_int8_gemm_pack_b
{
std
::
string
name
()
const
{
return
"gpu::int8_gemm_pack_b"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
;
argument
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
;
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/include/migraphx/gpu/miopen.hpp
View file @
22bb777f
...
...
@@ -127,7 +127,7 @@ inline void set_tensor_descriptor(miopenTensorArgumentId_t name,
}
#endif
inline
tensor_descriptor
make_tensor
(
const
migraphx
::
shape
&
os
,
bool
pack
=
false
)
inline
tensor_descriptor
make_tensor
(
const
migraphx
::
shape
&
os
)
{
auto
s
=
os
.
normalize_standard
();
auto
t
=
make_obj
<
tensor_descriptor
>
(
&
miopenCreateTensorDescriptor
);
...
...
@@ -142,23 +142,9 @@ inline tensor_descriptor make_tensor(const migraphx::shape& os, bool pack = fals
else
if
(
s
.
type
()
==
shape
::
int32_type
)
d
=
miopenInt32
;
else
if
(
s
.
type
()
==
shape
::
int8_type
)
{
if
(
pack
)
{
// update the lens and corresponding strides
d
=
miopenInt8x4
;
lens
[
1
]
=
((
lens
[
1
]
+
3
)
/
4
)
*
4
;
strides
[
0
]
=
strides
[
1
]
*
lens
[
1
];
}
else
{
d
=
miopenInt8
;
}
}
else
{
MIGRAPHX_THROW
(
"MAKE_TENSOR: unsupported type"
);
}
miopenSetTensorDescriptor
(
t
.
get
(),
d
,
s
.
lens
().
size
(),
lens
.
data
(),
strides
.
data
());
return
t
;
...
...
src/targets/gpu/include/migraphx/gpu/pack_int8_args.hpp
deleted
100644 → 0
View file @
409fd18c
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_RTGLIB_PACK_INT8_ARGS_HPP
#define MIGRAPHX_GUARD_RTGLIB_PACK_INT8_ARGS_HPP
#include <migraphx/program.hpp>
#include <migraphx/gpu/context.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
struct
MIGRAPHX_GPU_EXPORT
pack_int8_args
{
std
::
string
name
()
const
{
return
"gpu::pack_int8_args"
;
}
void
apply
(
module
&
m
)
const
;
shape
pack_int8_shape
(
const
shape
&
s
)
const
;
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/include/migraphx/gpu/rocblas.hpp
View file @
22bb777f
...
...
@@ -40,8 +40,6 @@ struct context;
MIGRAPHX_GPU_EXPORT
bool
get_compute_fp32_flag
();
MIGRAPHX_GPU_EXPORT
bool
get_int8_x4_format
(
context
&
ctx
);
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
...
...
src/targets/gpu/int8_conv_pack.cpp
deleted
100644 → 0
View file @
409fd18c
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/gpu/int8_conv_pack.hpp>
#include <migraphx/gpu/context.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
shape
pack_int8_shape
(
const
shape
&
s
)
{
if
(
s
.
type
()
!=
shape
::
int8_type
)
{
MIGRAPHX_THROW
(
"PACK_INT8_ARGS: only process int8_type"
);
}
auto
lens
=
s
.
lens
();
auto
strides
=
s
.
strides
();
lens
[
1
]
=
(
lens
[
1
]
+
3
)
/
4
*
4
;
strides
[
0
]
=
strides
[
1
]
*
lens
[
1
];
return
{
s
.
type
(),
lens
,
strides
};
}
shape
miopen_int8_conv_pack
::
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
check_shapes
{{
inputs
.
at
(
0
)},
*
this
}.
has
(
1
).
standard
();
return
pack_int8_shape
(
inputs
.
at
(
0
));
}
argument
miopen_int8_conv_pack
::
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
{
auto
arg_desc
=
make_tensor
(
args
[
0
].
get_shape
());
auto
arg_desc_vec4
=
make_tensor
(
args
[
0
].
get_shape
(),
true
);
float
alpha
=
1
;
float
beta
=
0
;
// pack input to vec4 format
auto
status
=
miopenTransformTensor
(
ctx
.
get_stream
().
get_miopen
(),
&
alpha
,
arg_desc
.
get
(),
args
[
0
].
implicit
(),
&
beta
,
arg_desc_vec4
.
get
(),
args
[
1
].
implicit
());
if
(
status
!=
miopenStatusSuccess
)
{
MIGRAPHX_THROW
(
"INT8_CONV_PACK: transform input tensor failed"
);
}
return
args
[
1
];
}
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/int8_gemm_pack.cpp
deleted
100644 → 0
View file @
409fd18c
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/gpu/int8_gemm_pack.hpp>
#include <migraphx/gpu/device/int8_gemm_pack.hpp>
#include <migraphx/gpu/context.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
shape
hip_int8_gemm_pack_a
::
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
check_shapes
{{
inputs
.
at
(
0
)},
*
this
}.
has
(
1
).
not_broadcasted
().
packed
();
return
inputs
.
at
(
0
);
}
argument
hip_int8_gemm_pack_a
::
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
{
device
::
int8_gemm_pack_a
(
ctx
.
get_stream
().
get
(),
args
[
1
],
args
[
0
]);
return
args
[
1
];
}
shape
hip_int8_gemm_pack_b
::
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
check_shapes
{{
inputs
.
at
(
0
)},
*
this
}.
has
(
1
).
not_broadcasted
().
packed
();
return
inputs
.
at
(
0
);
}
argument
hip_int8_gemm_pack_b
::
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
{
device
::
int8_gemm_pack_b
(
ctx
.
get_stream
().
get
(),
args
[
1
],
args
[
0
]);
return
args
[
1
];
}
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/lowering.cpp
View file @
22bb777f
...
...
@@ -62,7 +62,6 @@ struct miopen_apply
std
::
unordered_map
<
std
::
string
,
std
::
function
<
instruction_ref
(
instruction_ref
)
>>
apply_map
{};
instruction_ref
last
{};
bool
offload_copy
=
false
;
bool
int8_x4_format
=
true
;
bool
compute_fp32
=
false
;
context
&
get_context
()
const
...
...
@@ -84,8 +83,6 @@ struct miopen_apply
assert
(
mod
!=
nullptr
);
assert
(
pass
!=
nullptr
);
auto
&
ctx
=
get_context
();
int8_x4_format
=
get_int8_x4_format
(
ctx
);
compute_fp32
=
get_compute_fp32_flag
();
offload_copy
=
(
mod
==
mpm
->
get_root_module
())
?
pass
->
offload_copy
:
false
;
...
...
@@ -231,17 +228,14 @@ struct miopen_apply
assert
(
refs
.
size
()
==
2
);
auto
output
=
insert_allocation
(
ins
,
ins
->
get_shape
());
refs
.
push_back
(
output
);
return
mod
->
replace_instruction
(
ins
,
rocblas_gemm
<
Op
>
{
Op
{},
1
,
0
,
int8_x4_format
,
compute_fp32
},
refs
);
return
mod
->
replace_instruction
(
ins
,
rocblas_gemm
<
Op
>
{
Op
{},
1
,
0
,
compute_fp32
},
refs
);
});
}
void
add_convolution_op
(
const
std
::
string
&
name
)
{
apply_map
.
emplace
(
name
,
[
=
](
instruction_ref
ins
)
{
operation
conv
=
make_op
(
"gpu::"
+
name
,
{{
"op"
,
ins
->
get_operator
().
to_value
()},
{
"int8_x4_format"
,
int8_x4_format
}});
operation
conv
=
make_op
(
"gpu::"
+
name
,
{{
"op"
,
ins
->
get_operator
().
to_value
()}});
auto
output
=
insert_allocation
(
ins
,
ins
->
get_shape
());
return
mod
->
replace_instruction
(
ins
,
...
...
src/targets/gpu/pack_int8_args.cpp
deleted
100644 → 0
View file @
409fd18c
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <iterator>
#include <migraphx/gpu/pack_int8_args.hpp>
#include <migraphx/gpu/int8_gemm_pack.hpp>
#include <migraphx/gpu/int8_conv_pack.hpp>
#include <migraphx/gpu/hip.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/instruction_ref.hpp>
#include <migraphx/program.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/permutation.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
static
instruction_ref
pad_ins
(
module
&
m
,
instruction_ref
ins
,
int
offset
)
{
auto
s
=
ins
->
get_shape
();
auto
lens
=
s
.
lens
();
auto
k
=
lens
[
lens
.
size
()
+
offset
];
auto
pad_k
=
(
k
+
3
)
/
4
*
4
;
auto
pad_lens
=
lens
;
pad_lens
[
lens
.
size
()
+
offset
]
=
pad_k
;
auto
ret_ins
=
ins
;
if
(
pad_k
!=
k
)
{
std
::
vector
<
int64_t
>
pad_dims
(
lens
.
size
()
*
2
,
0
);
pad_dims
[
lens
.
size
()
+
offset
]
=
pad_k
-
k
;
shape
ps
{
s
.
type
(),
pad_lens
};
auto
ins_out
=
m
.
insert_instruction
(
ins
,
make_op
(
"hip::allocate"
,
{{
"shape"
,
to_value
(
ps
)}}));
auto
pad
=
make_op
(
"pad"
,
{{
"pads"
,
pad_dims
}});
ret_ins
=
m
.
insert_instruction
(
std
::
next
(
ins
),
make_op
(
"gpu::pad"
,
pad
.
to_value
()),
ins
,
ins_out
);
}
return
ret_ins
;
}
static
std
::
vector
<
instruction_ref
>
pad_inputs
(
module
&
m
,
instruction_ref
ins
)
{
std
::
vector
<
instruction_ref
>
ret_inputs
;
auto
inputs
=
ins
->
inputs
();
auto
in0
=
inputs
.
at
(
0
);
auto
sa
=
in0
->
get_shape
();
bool
transa
=
sa
.
transposed
();
if
(
transa
)
{
auto
perm
=
find_permutation
(
sa
);
auto
val
=
in0
->
get_operator
().
to_value
();
if
(
val
.
contains
(
"dims"
))
{
int
offset
=
static_cast
<
int
>
(
perm
.
back
())
-
static_cast
<
int
>
(
perm
.
size
());
auto
t_in
=
in0
->
inputs
().
front
();
auto
p_in
=
pad_ins
(
m
,
t_in
,
offset
);
auto
dims
=
val
.
at
(
"dims"
).
to_vector
<
int64_t
>
();
auto
r_in
=
m
.
insert_instruction
(
ins
,
make_op
(
"transpose"
,
{{
"permutation"
,
dims
}}),
p_in
);
ret_inputs
.
push_back
(
r_in
);
}
else
{
shape
cs
{
in0
->
get_shape
().
type
(),
in0
->
get_shape
().
lens
()};
auto
con_out
=
m
.
insert_instruction
(
ins
,
make_op
(
"hip::allocate"
,
{{
"shape"
,
to_value
(
cs
)}}));
auto
cin0
=
m
.
insert_instruction
(
ins
,
make_op
(
"gpu::contiguous"
),
in0
,
con_out
);
ret_inputs
.
push_back
(
pad_ins
(
m
,
cin0
,
-
1
));
}
}
else
{
ret_inputs
.
push_back
(
pad_ins
(
m
,
in0
,
-
1
));
}
auto
in1
=
inputs
.
at
(
1
);
auto
sb
=
in1
->
get_shape
();
bool
transb
=
sb
.
transposed
();
if
(
transb
)
{
auto
perm
=
find_permutation
(
sb
);
auto
val
=
in1
->
get_operator
().
to_value
();
if
(
val
.
contains
(
"dims"
))
{
int
offset
=
static_cast
<
int
>
(
perm
[
perm
.
size
()
-
2
])
-
static_cast
<
int
>
(
perm
.
size
());
auto
t_in
=
in1
->
inputs
().
front
();
auto
p_in
=
pad_ins
(
m
,
t_in
,
offset
);
auto
dims
=
val
.
at
(
"dims"
).
to_vector
<
int64_t
>
();
auto
r_in
=
m
.
insert_instruction
(
ins
,
make_op
(
"transpose"
,
{{
"permutation"
,
dims
}}),
p_in
);
ret_inputs
.
push_back
(
r_in
);
}
else
{
shape
cs
{
in1
->
get_shape
().
type
(),
in1
->
get_shape
().
lens
()};
auto
con_out
=
m
.
insert_instruction
(
ins
,
make_op
(
"hip::allocate"
,
{{
"shape"
,
to_value
(
cs
)}}));
auto
cin1
=
m
.
insert_instruction
(
ins
,
make_op
(
"gpu::contiguous"
),
in1
,
con_out
);
ret_inputs
.
push_back
(
pad_ins
(
m
,
cin1
,
-
2
));
}
}
else
{
ret_inputs
.
push_back
(
pad_ins
(
m
,
in1
,
-
2
));
}
std
::
copy
(
inputs
.
begin
()
+
2
,
inputs
.
end
(),
std
::
back_inserter
(
ret_inputs
));
return
ret_inputs
;
}
void
pack_int8_args
::
apply
(
module
&
m
)
const
{
for
(
auto
ins
:
iterator_for
(
m
))
{
if
(
ins
->
name
()
==
"gpu::quant_gemm"
)
{
auto
val
=
ins
->
get_operator
().
to_value
();
assert
(
val
.
contains
(
"int8_x4_format"
));
if
(
not
val
.
at
(
"int8_x4_format"
).
to
<
bool
>
())
{
continue
;
}
auto
inputs
=
ins
->
inputs
();
auto
lens
=
inputs
.
at
(
0
)
->
get_shape
().
lens
();
// gemm need the k to be multiple of 4, so need packing that dimension
auto
old_inputs
=
inputs
;
if
((
lens
.
back
()
%
4
)
!=
0
)
{
inputs
=
pad_inputs
(
m
,
ins
);
}
bool
transa
=
inputs
[
0
]
->
get_shape
().
transposed
();
bool
transb
=
inputs
[
1
]
->
get_shape
().
transposed
();
if
(
not
transb
)
{
auto
packed_b
=
m
.
insert_instruction
(
ins
,
make_op
(
"hip::allocate"
,
{{
"shape"
,
to_value
(
inputs
[
1
]
->
get_shape
())}}));
auto
output_b
=
m
.
insert_instruction
(
ins
,
make_op
(
"gpu::int8_gemm_pack_a"
),
{
inputs
[
1
],
packed_b
});
inputs
[
1
]
=
output_b
;
}
if
(
transa
)
{
auto
packed_a
=
m
.
insert_instruction
(
ins
,
make_op
(
"hip::allocate"
,
{{
"shape"
,
to_value
(
inputs
[
0
]
->
get_shape
())}}));
auto
output_a
=
m
.
insert_instruction
(
ins
,
make_op
(
"gpu::int8_gemm_pack_b"
),
{
inputs
[
0
],
packed_a
});
inputs
[
0
]
=
output_a
;
}
if
(
inputs
!=
old_inputs
)
{
m
.
replace_instruction
(
ins
,
ins
->
get_operator
(),
inputs
);
}
}
else
if
(
ins
->
name
()
==
"gpu::quant_convolution"
)
{
auto
val
=
ins
->
get_operator
().
to_value
();
if
(
not
val
.
at
(
"int8_x4_format"
).
to
<
bool
>
())
{
continue
;
}
auto
inputs
=
ins
->
inputs
();
auto
packed_x
=
m
.
insert_instruction
(
ins
,
make_op
(
"hip::allocate"
,
{{
"shape"
,
to_value
(
pack_int8_shape
(
inputs
[
0
]
->
get_shape
()))}}));
auto
output_x
=
m
.
insert_instruction
(
ins
,
make_op
(
"gpu::int8_conv_pack"
),
{
inputs
[
0
],
packed_x
});
instruction
::
replace_argument
(
ins
,
inputs
[
0
],
output_x
);
auto
packed_w
=
m
.
insert_instruction
(
ins
,
make_op
(
"hip::allocate"
,
{{
"shape"
,
to_value
(
pack_int8_shape
(
inputs
[
1
]
->
get_shape
()))}}));
auto
output_w
=
m
.
insert_instruction
(
ins
,
make_op
(
"gpu::int8_conv_pack"
),
{
inputs
[
1
],
packed_w
});
instruction
::
replace_argument
(
ins
,
inputs
[
1
],
output_w
);
}
}
}
shape
pack_int8_args
::
pack_int8_shape
(
const
shape
&
s
)
const
{
if
(
s
.
type
()
!=
shape
::
int8_type
)
{
MIGRAPHX_THROW
(
"PACK_INT8_ARGS: only process int8_type"
);
}
auto
lens
=
s
.
lens
();
auto
strides
=
s
.
strides
();
lens
[
1
]
=
(
lens
[
1
]
+
3
)
/
4
*
4
;
strides
[
0
]
=
strides
[
1
]
*
lens
[
1
];
return
{
s
.
type
(),
lens
,
strides
};
}
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment