Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
7702c20d
Commit
7702c20d
authored
Aug 19, 2022
by
Paul
Browse files
Merge
parents
c362e7fa
9afce86d
Changes
248
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
516 additions
and
70 deletions
+516
-70
src/targets/gpu/fuse_mlir.cpp
src/targets/gpu/fuse_mlir.cpp
+139
-0
src/targets/gpu/fuse_ops.cpp
src/targets/gpu/fuse_ops.cpp
+2
-9
src/targets/gpu/hip.cpp
src/targets/gpu/hip.cpp
+54
-17
src/targets/gpu/include/migraphx/gpu/code_object_op.hpp
src/targets/gpu/include/migraphx/gpu/code_object_op.hpp
+12
-7
src/targets/gpu/include/migraphx/gpu/deconvolution.hpp
src/targets/gpu/include/migraphx/gpu/deconvolution.hpp
+4
-4
src/targets/gpu/include/migraphx/gpu/device_name.hpp
src/targets/gpu/include/migraphx/gpu/device_name.hpp
+2
-0
src/targets/gpu/include/migraphx/gpu/fuse_mlir.hpp
src/targets/gpu/include/migraphx/gpu/fuse_mlir.hpp
+10
-9
src/targets/gpu/include/migraphx/gpu/int8_conv_pack.hpp
src/targets/gpu/include/migraphx/gpu/int8_conv_pack.hpp
+1
-0
src/targets/gpu/include/migraphx/gpu/int8_gemm_pack.hpp
src/targets/gpu/include/migraphx/gpu/int8_gemm_pack.hpp
+1
-0
src/targets/gpu/include/migraphx/gpu/mlir.hpp
src/targets/gpu/include/migraphx/gpu/mlir.hpp
+50
-0
src/targets/gpu/include/migraphx/gpu/perfdb.hpp
src/targets/gpu/include/migraphx/gpu/perfdb.hpp
+49
-0
src/targets/gpu/include/migraphx/gpu/quant_convolution.hpp
src/targets/gpu/include/migraphx/gpu/quant_convolution.hpp
+2
-2
src/targets/gpu/jit/layernorm.cpp
src/targets/gpu/jit/layernorm.cpp
+24
-1
src/targets/gpu/jit/mlir.cpp
src/targets/gpu/jit/mlir.cpp
+58
-0
src/targets/gpu/jit/softmax.cpp
src/targets/gpu/jit/softmax.cpp
+25
-2
src/targets/gpu/kernels/include/migraphx/kernels/index.hpp
src/targets/gpu/kernels/include/migraphx/kernels/index.hpp
+36
-13
src/targets/gpu/kernels/include/migraphx/kernels/layernorm.hpp
...argets/gpu/kernels/include/migraphx/kernels/layernorm.hpp
+24
-3
src/targets/gpu/kernels/include/migraphx/kernels/ops.hpp
src/targets/gpu/kernels/include/migraphx/kernels/ops.hpp
+2
-2
src/targets/gpu/kernels/include/migraphx/kernels/preload.hpp
src/targets/gpu/kernels/include/migraphx/kernels/preload.hpp
+2
-1
src/targets/gpu/kernels/include/migraphx/kernels/reduce.hpp
src/targets/gpu/kernels/include/migraphx/kernels/reduce.hpp
+19
-0
No files found.
src/targets/gpu/fuse_mlir.cpp
0 → 100644
View file @
7702c20d
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/gpu/fuse_mlir.hpp>
#include <migraphx/gpu/mlir.hpp>
#include <migraphx/matcher.hpp>
#include <migraphx/pass_manager.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/register_op.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
struct
module
;
namespace
gpu
{
#ifdef MIGRAPHX_MLIR
struct
mlir_conv
{
operation
op
=
make_op
(
"convolution"
);
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
pack
(
f
(
self
.
op
,
"op"
));
}
std
::
string
name
()
const
{
return
"gpu::mlir_conv"
;
}
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
,
const
std
::
vector
<
module_ref
>&
mods
)
const
{
check_shapes
{
inputs
,
*
this
}.
standard
();
if
(
mods
.
size
()
!=
1
)
MIGRAPHX_THROW
(
"should have one submodule."
);
if
(
inputs
.
size
()
<
2
)
MIGRAPHX_THROW
(
"should have at least two inputs."
);
auto
n
=
inputs
.
size
();
return
op
.
compute_shape
({
inputs
[
n
-
2
],
inputs
[
n
-
1
]});
}
};
MIGRAPHX_REGISTER_OP
(
mlir_conv
);
namespace
{
struct
find_conv_pointwise
{
// Find a convolution followed by a pointwise operation.
auto
matcher
()
const
{
auto
convolution
=
match
::
skip
(
match
::
name
(
"contiguous"
))(
match
::
name
(
"convolution"
).
bind
(
"convolution"
));
return
match
::
name
(
"pointwise"
)(
match
::
any_of
[
match
::
inputs
()](
convolution
.
bind
(
"x"
)));
}
void
apply
(
module_pass_manager
&
mpm
,
const
match
::
matcher_result
&
r
)
const
{
auto
ins
=
r
.
result
;
auto
conv_ins
=
r
.
instructions
[
"convolution"
];
auto
x_ins
=
r
.
instructions
[
"x"
];
// input after contiguous
auto
*
pm
=
ins
->
module_inputs
().
front
();
auto
names
=
pm
->
get_parameter_names
();
// Whitelist pointwise operators
if
(
std
::
any_of
(
pm
->
begin
(),
pm
->
end
(),
[](
const
auto
&
i
)
{
return
not
contains
({
"@literal"
,
"@param"
,
"@return"
,
"convolution"
,
"add"
,
"relu"
},
i
.
name
());
}))
return
;
// Only fuse with fp32 for now
if
(
std
::
any_of
(
ins
->
inputs
().
begin
(),
ins
->
inputs
().
end
(),
[
&
](
auto
i
)
{
return
i
->
get_shape
().
type
()
!=
shape
::
type_t
::
float_type
;
}))
return
;
std
::
sort
(
names
.
begin
(),
names
.
end
());
module_ref
mm
=
mpm
.
create_module
(
"mlir_"
+
pm
->
name
());
mm
->
set_bypass
();
std
::
unordered_map
<
instruction_ref
,
instruction_ref
>
param_map
;
auto
x
=
mm
->
add_parameter
(
"x"
+
std
::
to_string
(
names
.
size
()),
conv_ins
->
inputs
().
at
(
0
)
->
get_shape
());
auto
w
=
mm
->
add_parameter
(
"x"
+
std
::
to_string
(
names
.
size
()
+
1
),
conv_ins
->
inputs
().
at
(
1
)
->
get_shape
());
auto
conv
=
mm
->
add_instruction
(
conv_ins
->
get_operator
(),
{
x
,
w
});
std
::
transform
(
names
.
begin
(),
names
.
end
(),
ins
->
inputs
().
begin
(),
std
::
inserter
(
param_map
,
param_map
.
end
()),
[
&
](
auto
name
,
auto
input
)
{
if
(
input
==
x_ins
)
return
std
::
make_pair
(
pm
->
get_parameter
(
name
),
conv
);
return
std
::
make_pair
(
pm
->
get_parameter
(
name
),
mm
->
add_parameter
(
name
,
input
->
get_shape
()));
});
mm
->
add_return
(
mm
->
insert_instructions
(
mm
->
end
(),
pm
,
param_map
));
std
::
vector
<
instruction_ref
>
inputs
;
std
::
copy_if
(
ins
->
inputs
().
begin
(),
ins
->
inputs
().
end
(),
std
::
back_inserter
(
inputs
),
[
&
](
auto
input
)
{
return
input
!=
conv_ins
;
});
inputs
.
insert
(
inputs
.
end
(),
conv_ins
->
inputs
().
begin
(),
conv_ins
->
inputs
().
end
());
mpm
.
get_module
().
replace_instruction
(
ins
,
mlir_conv
{
conv_ins
->
get_operator
()},
inputs
,
{
mm
});
}
};
}
// namespace
#endif
void
fuse_mlir
::
apply
(
module_pass_manager
&
mpm
)
const
{
#ifdef MIGRAPHX_MLIR
match
::
find_matches
(
mpm
,
find_conv_pointwise
{});
#else
(
void
)
mpm
;
#endif
}
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/fuse_ops.cpp
View file @
7702c20d
...
...
@@ -336,6 +336,7 @@ void move_standard_front(std::vector<instruction_ref>& args)
auto
gpu_name
(
const
std
::
string
&
s
)
{
return
match
::
name
(
"gpu::"
+
s
);
}
namespace
{
struct
find_layernorm
{
auto
matcher
()
const
{
return
match
::
layernorm
(
&
gpu_name
);
}
...
...
@@ -837,15 +838,6 @@ inline auto precompile_name(Strings... names) // NOLINT
});
}
template
<
class
...
Ms
>
auto
conv_bias_pointwise
(
Ms
...
ms
)
{
return
precompile_name
(
"pointwise"
)(
match
::
either_arg
(
0
,
1
)(
bias_shape
(
match
::
used_once
()).
bind
(
"bias"
),
fusable_conv
(
match
::
used_once
()).
bind
(
"conv"
)),
ms
...);
}
struct
find_conv_bias
{
context
*
ctx
=
nullptr
;
...
...
@@ -1014,6 +1006,7 @@ struct find_commutative_broadcast
m
.
replace_instruction
(
ins
,
ins
->
get_operator
(),
args
);
}
};
}
// namespace
struct
find_contiguous
{
...
...
src/targets/gpu/hip.cpp
View file @
7702c20d
...
...
@@ -23,13 +23,13 @@
*/
#include <migraphx/gpu/hip.hpp>
#include <migraphx/manage_ptr.hpp>
#include <migraphx/register_op.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/device/contiguous.hpp>
#include <miopen/miopen.h>
#include <memory>
#include <mutex>
#include <vector>
namespace
migraphx
{
...
...
@@ -77,12 +77,38 @@ void* get_device_ptr(void* hptr)
return
result
;
}
hip_ptr
allocate_gpu
(
std
::
size_t
sz
,
bool
host
=
false
)
struct
host_ptr_cache
{
std
::
unordered_map
<
void
*
,
std
::
weak_ptr
<
void
>>
cache
;
std
::
mutex
m
;
std
::
shared_ptr
<
void
>
get
(
void
*
ptr
)
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
m
);
auto
it
=
cache
.
find
(
ptr
);
if
(
it
!=
cache
.
end
())
return
it
->
second
.
lock
();
return
nullptr
;
}
void
put
(
const
std
::
shared_ptr
<
void
>&
p
)
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
m
);
cache
[
p
.
get
()]
=
p
;
}
};
static
host_ptr_cache
&
get_host_ptr_cache
()
{
static
host_ptr_cache
cache
;
return
cache
;
}
std
::
shared_ptr
<
void
>
allocate_gpu
(
std
::
size_t
sz
,
bool
host
=
false
)
{
if
(
sz
>
get_available_gpu_memory
())
MIGRAPHX_THROW
(
"Memory not available to allocate buffer: "
+
std
::
to_string
(
sz
));
void
*
result
=
nullptr
;
auto
status
=
host
?
hipHostMalloc
(
&
result
,
sz
)
:
hipMalloc
(
&
result
,
sz
);
void
*
alloc_ptr
=
nullptr
;
auto
status
=
host
?
hipHostMalloc
(
&
alloc_ptr
,
sz
)
:
hipMalloc
(
&
alloc_ptr
,
sz
);
if
(
status
!=
hipSuccess
)
{
if
(
host
)
...
...
@@ -90,16 +116,28 @@ hip_ptr allocate_gpu(std::size_t sz, bool host = false)
else
return
allocate_gpu
(
sz
,
true
);
}
assert
(
result
!=
nullptr
);
return
hip_ptr
{
result
};
assert
(
alloc_ptr
!=
nullptr
);
std
::
shared_ptr
<
void
>
result
=
share
(
hip_ptr
{
alloc_ptr
});
if
(
host
)
{
get_host_ptr_cache
().
put
(
result
);
}
return
result
;
}
hip_host_ptr
register_on_gpu
(
void
*
ptr
,
std
::
size_t
sz
)
std
::
shared_ptr
<
void
>
register_on_gpu
(
void
*
ptr
,
std
::
size_t
sz
)
{
std
::
shared_ptr
<
void
>
result
=
get_host_ptr_cache
().
get
(
ptr
);
if
(
result
)
{
return
result
;
}
auto
status
=
hipHostRegister
(
ptr
,
sz
,
hipHostRegisterMapped
);
if
(
status
!=
hipSuccess
)
MIGRAPHX_THROW
(
"Gpu register failed: "
+
hip_error
(
status
));
return
hip_host_ptr
{
ptr
};
result
=
share
(
hip_host_ptr
{
ptr
});
get_host_ptr_cache
().
put
(
result
);
return
result
;
}
template
<
class
T
>
...
...
@@ -115,7 +153,7 @@ std::vector<T> read_from_gpu(const void* x, std::size_t sz)
return
result
;
}
hip_ptr
write_to_gpu
(
const
void
*
x
,
std
::
size_t
sz
,
bool
host
=
false
)
std
::
shared_ptr
<
void
>
write_to_gpu
(
const
void
*
x
,
std
::
size_t
sz
,
bool
host
=
false
)
{
gpu_sync
();
auto
result
=
allocate_gpu
(
sz
,
host
);
...
...
@@ -137,22 +175,21 @@ hip_ptr write_to_gpu(const T& x)
argument
allocate_gpu
(
const
shape
&
s
,
bool
host
)
{
auto
p
=
share
(
allocate_gpu
(
s
.
bytes
()
+
1
,
host
)
)
;
auto
p
=
allocate_gpu
(
s
.
bytes
()
+
1
,
host
);
return
{
s
,
[
p
]()
mutable
{
return
reinterpret_cast
<
char
*>
(
p
.
get
());
}};
}
argument
register_on_gpu
(
const
argument
&
arg
)
{
auto
arg_shared
=
arg
.
share
();
auto
p
=
share
(
register_on_gpu
(
arg_shared
.
data
(),
arg_shared
.
get_shape
().
bytes
()));
return
{
arg_shared
.
get_shape
(),
[
p
,
a
=
std
::
move
(
arg_shared
)]()
mutable
{
return
get_device_ptr
(
p
.
get
());
}};
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
auto
p
=
register_on_gpu
(
arg_shared
.
data
(),
arg_shared
.
get_shape
().
bytes
());
return
{
arg_shared
.
get_shape
(),
[
p
,
a
=
std
::
move
(
arg_shared
)]()
mutable
{
return
get_device_ptr
(
p
.
get
());
}};
}
argument
to_gpu
(
const
argument
&
arg
,
bool
host
)
{
auto
p
=
share
(
write_to_gpu
(
arg
.
data
(),
arg
.
get_shape
().
bytes
(),
host
)
)
;
auto
p
=
write_to_gpu
(
arg
.
data
(),
arg
.
get_shape
().
bytes
(),
host
);
return
{
arg
.
get_shape
(),
p
};
}
...
...
src/targets/gpu/include/migraphx/gpu/code_object_op.hpp
View file @
7702c20d
...
...
@@ -38,12 +38,13 @@ struct context;
struct
code_object_op
{
value
::
binary
code_object
;
std
::
string
symbol_name
;
std
::
size_t
global
;
std
::
size_t
local
;
std
::
vector
<
shape
>
expected_inputs
;
shape
output
;
value
::
binary
code_object
{};
std
::
string
symbol_name
=
""
;
std
::
size_t
global
=
0
;
std
::
size_t
local
=
0
;
std
::
vector
<
shape
>
expected_inputs
{};
shape
output
{};
std
::
int64_t
output_arg
=
-
1
;
kernel
k
{};
template
<
class
Self
,
class
F
>
...
...
@@ -66,9 +67,13 @@ struct code_object_op
argument
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
;
void
finalize
(
context
&
,
const
shape
&
,
const
std
::
vector
<
shape
>&
);
std
::
int64_t
get_output_arg
(
std
::
size_t
n
)
const
{
return
output_arg
<
0
?
n
+
output_arg
:
output_arg
;
}
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
return
get_output_arg
(
shapes
.
size
()
)
;
}
friend
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
const
code_object_op
&
op
)
...
...
src/targets/gpu/include/migraphx/gpu/deconvolution.hpp
View file @
7702c20d
...
...
@@ -39,20 +39,20 @@ struct miopen_deconvolution
op
::
deconvolution
op
;
shared
<
convolution_descriptor
>
cd
;
miopenConvFwdAlgorithm_t
algo
{};
miopenHandle_t
handle
=
nullptr
;
uint64_t
solution_id
=
0
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
// TODO: Add algo
return
op
::
convolution
::
reflect
(
self
.
op
,
f
);
return
pack_join
(
op
::
deconvolution
::
reflect
(
self
.
op
,
f
),
pack
(
f
(
self
.
solution_id
,
"solution_id"
))
);
}
std
::
string
name
()
const
{
return
"gpu::deconv"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
;
argument
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
;
shape
compile
(
context
&
ctx
,
const
shape
&
output_shape
,
std
::
vector
<
shape
>
inputs
);
shape
find
(
context
&
ctx
,
const
shape
&
output_shape
,
std
::
vector
<
shape
>
inputs
);
void
finalize
(
context
&
ctx
,
const
shape
&
output_shape
,
std
::
vector
<
shape
>
inputs
);
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
...
...
src/targets/gpu/include/migraphx/gpu/device_name.hpp
View file @
7702c20d
...
...
@@ -33,6 +33,8 @@ namespace gpu {
std
::
string
get_device_name
();
int
get_device_id
();
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
...
...
src/targets/gpu/include/migraphx/gpu/
mlir_conv
.hpp
→
src/targets/gpu/include/migraphx/gpu/
fuse_mlir
.hpp
View file @
7702c20d
...
...
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_
RTGLIB_MIOPEN_MLIR_CONV
_HPP
#define MIGRAPHX_GUARD_
RTGLIB_MIOPEN_MLIR_CONV
_HPP
#ifndef MIGRAPHX_GUARD_
GPU_FUSE_MLIR
_HPP
#define MIGRAPHX_GUARD_
GPU_FUSE_MLIR
_HPP
#include <migraphx/config.hpp>
#include <migraphx/gpu/context.hpp>
...
...
@@ -30,18 +30,19 @@
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
struct
module
;
struct
module
_pass_manager
;
namespace
gpu
{
struct
mlir_conv
struct
fuse_mlir
{
context
*
ctx
;
std
::
string
name
()
const
{
return
"
mlir::convolution
"
;
}
void
apply
(
module
&
m
)
const
;
context
*
ctx
=
nullptr
;
std
::
string
name
()
const
{
return
"
gpu::fuse_mlir
"
;
}
void
apply
(
module
_pass_manager
&
mp
m
)
const
;
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
#endif // MIGRAPHX_GUARD_GPU_FUSE_MLIR_HPP
src/targets/gpu/include/migraphx/gpu/int8_conv_pack.hpp
View file @
7702c20d
...
...
@@ -24,6 +24,7 @@
#ifndef MIGRAPHX_GUARD_RTGLIB_INT8_CONV_PACK_HPP
#define MIGRAPHX_GUARD_RTGLIB_INT8_CONV_PACK_HPP
#include <migraphx/argument.hpp>
#include <migraphx/op/quant_dot.hpp>
#include <migraphx/config.hpp>
#include <utility>
...
...
src/targets/gpu/include/migraphx/gpu/int8_gemm_pack.hpp
View file @
7702c20d
...
...
@@ -24,6 +24,7 @@
#ifndef MIGRAPHX_GUARD_RTGLIB_INT8_GEMM_PACK_HPP
#define MIGRAPHX_GUARD_RTGLIB_INT8_GEMM_PACK_HPP
#include <migraphx/argument.hpp>
#include <migraphx/op/quant_dot.hpp>
#include <migraphx/config.hpp>
#include <utility>
...
...
src/targets/gpu/include/migraphx/gpu/mlir.hpp
0 → 100644
View file @
7702c20d
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_RTGLIB_GPU_MLIR_HPP
#define MIGRAPHX_GUARD_RTGLIB_GPU_MLIR_HPP
#include <string>
#include <vector>
#include <migraphx/config.hpp>
#include <migraphx/gpu/code_object_op.hpp>
#include <migraphx/instruction_ref.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
struct
module
;
namespace
gpu
{
std
::
string
dump_mlir
(
const
module
&
m
);
code_object_op
compile_mlir
(
const
context
&
ctx
,
const
module
&
m
);
instruction_ref
insert_mlir
(
module
&
m
,
instruction_ref
ins
,
code_object_op
co
,
const
std
::
vector
<
instruction_ref
>&
inputs
);
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/include/migraphx/gpu/perfdb.hpp
0 → 100644
View file @
7702c20d
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_GPU_PERFDB_HPP
#define MIGRAPHX_GUARD_GPU_PERFDB_HPP
#include <migraphx/config.hpp>
#include <migraphx/shape.hpp>
#include <migraphx/operation.hpp>
#include <string>
#include <vector>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
struct
problem_params
{
operation
op
;
std
::
vector
<
shape
>
inputs
;
shape
output
;
};
std
::
string
get_mlir_perf_for_conv
(
const
problem_params
&
pp
);
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif // MIGRAPHX_GUARD_GPU_PERFDB_HPP
src/targets/gpu/include/migraphx/gpu/quant_convolution.hpp
View file @
7702c20d
...
...
@@ -41,7 +41,7 @@ struct miopen_quant_convolution
bool
int8_x4_format
=
false
;
shared
<
convolution_descriptor
>
cd
;
miopenConvFwdAlgorithm_t
algo
{};
miopenHandle_t
handle
=
nullptr
;
uint64_t
solution_id
=
0
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
...
...
@@ -55,7 +55,7 @@ struct miopen_quant_convolution
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
;
argument
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
;
shape
compile
(
context
&
ctx
,
const
shape
&
output_shape
,
std
::
vector
<
shape
>
inputs
);
shape
find
(
context
&
ctx
,
const
shape
&
output_shape
,
std
::
vector
<
shape
>
inputs
);
void
finalize
(
context
&
ctx
,
const
shape
&
output_shape
,
std
::
vector
<
shape
>
inputs
);
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
...
...
src/targets/gpu/jit/layernorm.cpp
View file @
7702c20d
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/gpu/compiler.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/compile_hip_code_object.hpp>
...
...
@@ -61,7 +84,7 @@ struct layernorm_compiler : compiler<layernorm_compiler>
}
auto
preloads
=
preload
::
broadcasts
(
axis
,
inputs
);
auto
relements
=
inputs
[
0
].
lens
()[
axis
]
/
vec
.
size
;
auto
nelements
=
inputs
.
back
().
elements
()
/
inputs
[
0
].
lens
()[
axis
];
auto
nelements
=
(
inputs
.
back
().
elements
()
/
inputs
[
0
].
lens
()[
axis
]
)
;
auto
block_size
=
compute_block_size
(
relements
,
256
);
hip_compile_options
options
;
options
.
set_launch_params
(
...
...
src/targets/gpu/jit/mlir.cpp
0 → 100644
View file @
7702c20d
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/gpu/compiler.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/mlir.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
struct
mlir_compiler
:
compiler
<
mlir_compiler
>
{
std
::
vector
<
std
::
string
>
names
()
const
{
return
{
"gpu::mlir_conv"
};
}
operation
compile_op
(
context
&
,
const
std
::
vector
<
shape
>&
,
const
value
&
)
const
{
return
{};
}
compiler_replace
compile
(
context
&
ctx
,
instruction_ref
ins
,
const
operation
&
)
const
{
auto
*
smod
=
ins
->
module_inputs
().
front
();
assert
(
smod
->
get_parameter_names
().
size
()
==
ins
->
inputs
().
size
()
-
1
);
return
insert
(
compile_mlir
(
ctx
,
*
smod
));
}
compiler_replace
insert
(
code_object_op
co
)
const
{
return
[
co
=
std
::
move
(
co
)](
module
&
m
,
instruction_ref
ins
)
{
auto
mlir
=
insert_mlir
(
m
,
ins
,
co
,
ins
->
inputs
());
m
.
replace_instruction
(
ins
,
mlir
);
};
}
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/jit/softmax.cpp
View file @
7702c20d
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/gpu/compiler.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/compile_hip_code_object.hpp>
...
...
@@ -52,12 +75,12 @@ struct softmax_compiler : compiler<softmax_compiler>
auto
faxis
=
find_fast_axis
({
inputs
.
front
()});
vectorize
vec
{};
// Vectorize if the axis is a reduction axis
if
(
inputs
.
back
().
lens
()[
faxis
]
==
1
)
if
(
faxis
==
axis
)
{
vec
=
vectorize
::
elements
(
faxis
,
inputs
);
}
auto
relements
=
inputs
[
0
].
lens
()[
axis
]
/
vec
.
size
;
auto
nelements
=
inputs
.
back
().
elements
()
/
relements
;
auto
nelements
=
(
inputs
.
back
().
elements
()
/
inputs
[
0
].
lens
()[
axis
])
;
auto
block_size
=
compute_block_size
(
relements
,
256
);
hip_compile_options
options
;
options
.
set_launch_params
(
...
...
src/targets/gpu/kernels/include/migraphx/kernels/index.hpp
View file @
7702c20d
...
...
@@ -27,6 +27,7 @@
#include <migraphx/kernels/hip.hpp>
#include <migraphx/kernels/types.hpp>
#include <migraphx/kernels/integral_constant.hpp>
#include <migraphx/kernels/type_traits.hpp>
namespace
migraphx
{
...
...
@@ -53,29 +54,51 @@ struct index
return
blockDim
.
x
;
// NOLINT
}
#endif
template
<
class
N
,
class
Stride
>
static
constexpr
auto
max_stride_iterations
(
N
n
,
Stride
stride
)
{
return
(
n
-
_c
<
1
>
)
/
stride
+
_c
<
1
>
;
}
template
<
class
F
>
__device__
void
global_stride
(
index_int
n
,
F
f
)
const
template
<
class
F
,
class
N
,
class
Stride
>
static
constexpr
void
for_stride
(
index_int
start
,
N
n
,
Stride
stride
,
F
f
)
{
const
auto
stride
=
nglobal
();
for
(
in
de
x
_i
nt
i
=
global
;
i
<
n
;
i
+=
stride
)
if
const
expr
(
not
is_integral
<
N
>
{}
and
not
is_integral
<
Stride
>
{}
and
max_stri
de_i
terations
(
n
,
stride
)
==
1
)
{
f
(
i
);
if
constexpr
(
stride
>
n
)
{
if
(
start
<
n
)
f
(
start
);
}
else
{
f
(
start
);
}
}
else
{
for
(
index_int
i
=
start
;
i
<
n
;
i
+=
stride
)
{
f
(
i
);
}
}
}
template
<
class
F
>
__device__
void
lo
c
al_stride
(
index_int
n
,
F
f
)
const
template
<
class
F
,
class
N
>
__device__
void
g
lo
b
al_stride
(
N
n
,
F
f
)
const
{
const
auto
stride
=
nlocal
();
for
(
index_int
i
=
local
;
i
<
n
;
i
+=
stride
)
{
f
(
i
);
}
for_stride
(
global
,
n
,
nglobal
(),
f
);
}
template
<
class
F
,
class
N
>
__device__
void
local_stride
(
N
n
,
F
f
)
const
{
for_stride
(
local
,
n
,
nlocal
(),
f
);
}
};
inline
__device__
index
make_index
()
inline
__device__
__attribute__
((
const
))
index
make_index
()
{
return
index
{
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
,
threadIdx
.
x
,
blockIdx
.
x
};
// NOLINT
}
...
...
src/targets/gpu/kernels/include/migraphx/kernels/layernorm.hpp
View file @
7702c20d
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_KERNELS_LAYERNORM_HPP
#define MIGRAPHX_GUARD_KERNELS_LAYERNORM_HPP
#include <migraphx/kernels/reduce.hpp>
...
...
@@ -23,11 +46,9 @@ __device__ void generic_binary_layernorm(
F
compute
,
BinOp
op
,
Output
output
,
Input1
input1
,
Input2
input2
,
Inputs
...
inputs
)
{
using
reduce_output
=
reduce
::
with_axis
<
Input1
,
Axis
>
;
constexpr
auto
relements
=
get_shape_c
<
Input1
>
{}.
elements
()
/
get_shape_c
<
reduce_output
>
{}.
elements
();
MIGRAPHX_ASSERT
(
relements
>
0
);
reduce
::
block
::
run
<
reduce_output
>
([
&
](
auto
,
auto
r
)
{
using
value_type
=
typename
Input1
::
type
;
constexpr
auto
relements
=
r
.
template
elements
<
Input1
>();
auto
means
=
r
.
reduce
(
op
::
sum
{},
make_array
<
value_type
>
(
0
,
0
),
[
&
](
auto
x1
,
auto
x2
)
{
auto
x
=
op
(
x1
,
x2
);
return
make_array
(
x
,
x
*
x
)
/
value_type
{
relements
};
...
...
src/targets/gpu/kernels/include/migraphx/kernels/ops.hpp
View file @
7702c20d
...
...
@@ -90,7 +90,7 @@ struct lowest
template
<
class
T
>
constexpr
operator
T
()
const
{
return
numeric_lowest
<
T
>
();
return
numeric_lowest
<
vec_type
<
T
>
>
();
}
};
...
...
@@ -99,7 +99,7 @@ struct highest
template
<
class
T
>
constexpr
operator
T
()
const
{
return
numeric_max
<
T
>
();
return
numeric_max
<
vec_type
<
T
>
>
();
}
};
}
// namespace migraphx
...
...
src/targets/gpu/kernels/include/migraphx/kernels/preload.hpp
View file @
7702c20d
...
...
@@ -186,7 +186,8 @@ __device__ auto auto_preload(index idx)
{
return
make_transform
([
=
](
auto
f
,
auto
...
xs
)
{
auto
invoke
=
[
=
](
auto
...
ys
)
{
__syncthreads
();
if
constexpr
((
Bs
or
...))
__syncthreads
();
f
(
ys
...);
};
join
(
invoke
,
preload_copy
<
Bs
>
(
idx
,
xs
)...);
...
...
src/targets/gpu/kernels/include/migraphx/kernels/reduce.hpp
View file @
7702c20d
...
...
@@ -224,6 +224,18 @@ struct block
idx
.
local_stride
(
x
.
get_shape
().
elements
(),
[
&
](
auto
j
)
{
f
(
x
[
j
],
xs
[
j
]...);
});
});
}
template
<
class
Input
>
constexpr
auto
elements
()
const
{
using
reduce_type
=
decltype
(
slicer
(
Input
{}));
using
value_type
=
typename
Input
::
type
;
constexpr
auto
relements
=
get_shape_c
<
reduce_type
>
{}.
elements
();
if
constexpr
(
vec_size
<
value_type
>
()
>
1
)
return
relements
*
vec_size
<
value_type
>
();
else
return
relements
;
}
};
template
<
class
Slicer
>
...
...
@@ -281,6 +293,13 @@ struct lane
}
});
}
template
<
class
Input
>
constexpr
auto
elements
()
const
{
using
reduce_type
=
decltype
(
slicer
(
Input
{}));
return
get_shape_c
<
reduce_type
>
{}.
elements
();
}
};
template
<
class
Slicer
>
...
...
Prev
1
…
5
6
7
8
9
10
11
12
13
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment