Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
23cb7917
Unverified
Commit
23cb7917
authored
Aug 16, 2023
by
Brian Pickrell
Committed by
GitHub
Aug 16, 2023
Browse files
Merge branch 'develop' into blas_tuning
parents
b5fcc0bc
ea32ca70
Changes
458
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
479 additions
and
121 deletions
+479
-121
src/targets/gpu/compiler.cpp
src/targets/gpu/compiler.cpp
+24
-12
src/targets/gpu/device/include/migraphx/gpu/device/launch.hpp
...targets/gpu/device/include/migraphx/gpu/device/launch.hpp
+7
-1
src/targets/gpu/device/include/migraphx/gpu/device/nary.hpp
src/targets/gpu/device/include/migraphx/gpu/device/nary.hpp
+2
-2
src/targets/gpu/device/include/migraphx/gpu/device/visit.hpp
src/targets/gpu/device/include/migraphx/gpu/device/visit.hpp
+10
-10
src/targets/gpu/device/multinomial.cpp
src/targets/gpu/device/multinomial.cpp
+12
-11
src/targets/gpu/device/scatter.cpp
src/targets/gpu/device/scatter.cpp
+16
-12
src/targets/gpu/device/topk.cpp
src/targets/gpu/device/topk.cpp
+2
-2
src/targets/gpu/device_name.cpp
src/targets/gpu/device_name.cpp
+1
-13
src/targets/gpu/driver/CMakeLists.txt
src/targets/gpu/driver/CMakeLists.txt
+1
-1
src/targets/gpu/driver/compile_op.cpp
src/targets/gpu/driver/compile_op.cpp
+1
-1
src/targets/gpu/driver/run_op.cpp
src/targets/gpu/driver/run_op.cpp
+1
-1
src/targets/gpu/fuse_ck.cpp
src/targets/gpu/fuse_ck.cpp
+175
-0
src/targets/gpu/fuse_mlir.cpp
src/targets/gpu/fuse_mlir.cpp
+185
-35
src/targets/gpu/fuse_ops.cpp
src/targets/gpu/fuse_ops.cpp
+2
-1
src/targets/gpu/gemm_impl.cpp
src/targets/gpu/gemm_impl.cpp
+8
-0
src/targets/gpu/hip.cpp
src/targets/gpu/hip.cpp
+5
-1
src/targets/gpu/include/migraphx/gpu/allocation_model.hpp
src/targets/gpu/include/migraphx/gpu/allocation_model.hpp
+2
-2
src/targets/gpu/include/migraphx/gpu/analyze_streams.hpp
src/targets/gpu/include/migraphx/gpu/analyze_streams.hpp
+2
-2
src/targets/gpu/include/migraphx/gpu/compile_hip.hpp
src/targets/gpu/include/migraphx/gpu/compile_hip.hpp
+11
-6
src/targets/gpu/include/migraphx/gpu/compile_hip_code_object.hpp
...gets/gpu/include/migraphx/gpu/compile_hip_code_object.hpp
+12
-8
No files found.
src/targets/gpu/compiler.cpp
View file @
23cb7917
...
...
@@ -28,33 +28,45 @@ namespace migraphx {
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
auto
&
compiler_map
()
namespace
{
struct
compiler_handle
{
static
std
::
unordered_map
<
std
::
string
,
compiler_compile
>
m
;
// NOLINT
return
m
;
}
compiler_compile
compile
;
compiler_compile_op
compile_op
;
compiler_tuning_config
get_tuning_config
;
};
}
// namespace
auto
&
compiler_
op_
map
()
auto
&
compiler_map
()
{
static
std
::
unordered_map
<
std
::
string
,
compiler_
compile_op
>
m
;
// NOLINT
static
std
::
unordered_map
<
std
::
string
,
compiler_
handle
>
m
;
// NOLINT
return
m
;
}
void
register_compiler
(
const
std
::
string
&
name
,
compiler_compile
c
,
compiler_compile_op
cop
)
void
register_compiler
(
const
std
::
string
&
name
,
compiler_compile
c
,
compiler_compile_op
cop
,
compiler_tuning_config
ctg
)
{
compiler_map
()[
name
]
=
std
::
move
(
c
);
compiler_op_map
()[
name
]
=
std
::
move
(
cop
);
compiler_map
()[
name
]
=
{
std
::
move
(
c
),
std
::
move
(
cop
),
std
::
move
(
ctg
)};
}
bool
has_compiler_for
(
const
std
::
string
&
name
)
{
return
compiler_map
().
count
(
name
)
>
0
;
}
compiler_replace
compile
(
context
&
ctx
,
instruction_ref
ins
,
const
operation
&
op
)
compiler_replace
compile
(
context
&
ctx
,
instruction_ref
ins
,
const
operation
&
op
,
const
value
&
solution
)
{
return
compiler_map
().
at
(
op
.
name
())(
ctx
,
ins
,
op
);
return
compiler_map
().
at
(
op
.
name
())
.
compile
(
ctx
,
ins
,
op
,
solution
);
}
operation
compile_op
(
const
std
::
string
&
name
,
context
&
ctx
,
const
std
::
vector
<
shape
>&
inputs
,
const
value
&
v
)
{
return
compiler_op_map
().
at
(
name
)(
ctx
,
inputs
,
v
);
return
compiler_map
().
at
(
name
).
compile_op
(
ctx
,
inputs
,
v
);
}
optional
<
tuning_config
>
get_tuning_config
(
context
&
ctx
,
instruction_ref
ins
,
const
operation
&
op
,
bool
exhaustive
)
{
return
compiler_map
().
at
(
op
.
name
()).
get_tuning_config
(
ctx
,
ins
,
op
,
exhaustive
);
}
}
// namespace gpu
...
...
src/targets/gpu/device/include/migraphx/gpu/device/launch.hpp
View file @
23cb7917
...
...
@@ -41,7 +41,7 @@ struct index
__device__
index_int
nglobal
()
const
{
return
blockDim
.
x
*
gridDim
.
x
;
}
// NOLINT
__device__
index_int
nlocal
()
const
{
return
blockDim
.
x
;
}
// NOLINT
__device__
index_int
nlocal
()
const
{
return
blockDim
.
x
;
}
// NOLINT
template
<
class
F
>
__device__
void
global_stride
(
index_int
n
,
F
f
)
const
...
...
@@ -81,6 +81,12 @@ inline auto launch(hipStream_t stream, index_int global, index_int local)
dim3
nthreads
(
local
);
// cppcheck-suppress UseDeviceLaunch
hipLaunchKernelGGL
((
launcher
<
f_type
>
),
nblocks
,
nthreads
,
0
,
stream
,
f
);
hipError_t
kernel_launch_status
=
hipGetLastError
();
if
(
kernel_launch_status
!=
hipSuccess
)
{
MIGRAPHX_THROW
(
"MIGraphX device kernel failed to launch with error: "
+
std
::
string
(
hipGetErrorString
(
kernel_launch_status
)));
}
};
}
...
...
src/targets/gpu/device/include/migraphx/gpu/device/nary.hpp
View file @
23cb7917
...
...
@@ -124,7 +124,7 @@ void nary_broadcast_vec_impl(
buffer
[
i
]
=
binput
.
data
()[
i
];
}
__syncthreads
();
auto
*
bp
=
as_pointer
(
buffer
);
const
auto
*
bp
=
as_pointer
(
buffer
);
// Process the data
for
(
size_t
i
=
idx
.
global
;
i
<
nelements
;
i
+=
nglobal
)
{
...
...
@@ -219,7 +219,7 @@ void nary_double_broadcast_vec_impl(
buffer
[
i
+
bdim_vec_len
]
=
binput2
.
data
()[
i
];
}
__syncthreads
();
auto
*
bp
=
as_pointer
(
buffer
);
const
auto
*
bp
=
as_pointer
(
buffer
);
// Process the data
for
(
size_t
i
=
idx
.
global
;
i
<
nelements
;
i
+=
nglobal
)
{
...
...
src/targets/gpu/device/include/migraphx/gpu/device/visit.hpp
View file @
23cb7917
...
...
@@ -94,6 +94,10 @@ template <>
struct
is_hip_type
<
std
::
uint8_t
>
:
std
::
true_type
{
};
template
<
>
struct
is_hip_type
<
std
::
int32_t
>
:
std
::
true_type
{
};
template
<
class
T
,
class
V
,
MIGRAPHX_REQUIRES
(
is_hip_type
<
typename
T
::
type
>{})
>
void
hip_visitor_invoke
(
T
as
,
V
&&
v
)
...
...
@@ -120,12 +124,10 @@ void hip_visit_all_impl(const shape& s, F f, V&& v, Ts&&... xs)
if
(
not
std
::
all_of
(
types
.
begin
(),
types
.
end
(),
[
&
](
migraphx
::
shape
::
type_t
t
)
{
return
t
==
s
.
type
();
}))
MIGRAPHX_THROW
(
"Types must be the same"
);
std
::
initializer_list
<
index_int
>
ranks
=
{
static_cast
<
index_int
>
(
get_shape
(
xs
).
lens
().
size
())...};
if
(
not
std
::
all_of
(
ranks
.
begin
(),
ranks
.
end
(),
[
&
](
index_int
r
)
{
return
r
==
s
.
lens
().
size
();
}))
std
::
initializer_list
<
index_int
>
ranks
=
{
static_cast
<
index_int
>
(
get_shape
(
xs
).
ndim
())...};
if
(
not
std
::
all_of
(
ranks
.
begin
(),
ranks
.
end
(),
[
&
](
index_int
r
)
{
return
r
==
s
.
ndim
();
}))
MIGRAPHX_THROW
(
"Ranks must be the same"
);
visit_tensor_size
(
s
.
lens
().
size
(),
[
&
](
auto
ndim
)
{
visit_tensor_size
(
s
.
ndim
(),
[
&
](
auto
ndim
)
{
s
.
visit_type
(
hip_visitor
([
&
](
auto
as
)
{
v
(
f
(
xs
,
ndim
,
as
)...);
}));
});
}
...
...
@@ -133,12 +135,10 @@ void hip_visit_all_impl(const shape& s, F f, V&& v, Ts&&... xs)
template
<
class
V
,
class
F
,
class
...
Ts
>
void
hip_visit_views_impl
(
const
shape
&
s
,
F
f
,
V
&&
v
,
Ts
&&
...
xs
)
{
std
::
initializer_list
<
index_int
>
ranks
=
{
static_cast
<
index_int
>
(
get_shape
(
xs
).
lens
().
size
())...};
if
(
not
std
::
all_of
(
ranks
.
begin
(),
ranks
.
end
(),
[
&
](
index_int
r
)
{
return
r
==
s
.
lens
().
size
();
}))
std
::
initializer_list
<
index_int
>
ranks
=
{
static_cast
<
index_int
>
(
get_shape
(
xs
).
ndim
())...};
if
(
not
std
::
all_of
(
ranks
.
begin
(),
ranks
.
end
(),
[
&
](
index_int
r
)
{
return
r
==
s
.
ndim
();
}))
MIGRAPHX_THROW
(
"Ranks must be the same"
);
visit_tensor_size
(
s
.
lens
().
size
(),
[
&
](
auto
ndim
)
{
v
(
f
(
xs
,
ndim
)...);
});
visit_tensor_size
(
s
.
ndim
(),
[
&
](
auto
ndim
)
{
v
(
f
(
xs
,
ndim
)...);
});
}
template
<
class
F
>
...
...
src/targets/gpu/device/multinomial.cpp
View file @
23cb7917
...
...
@@ -67,18 +67,19 @@ void multinomial(hipStream_t stream,
size_t
class_size
=
arg0
.
get_shape
().
lens
().
back
();
size_t
sample_size
=
result
.
get_shape
().
lens
().
back
();
hip_visit_all
(
arg0
,
arg1
)([
&
](
auto
cdf
,
auto
dist
)
{
result
.
visit
([
&
](
auto
out
)
{
hip_visit_views
(
out
)([
&
](
auto
output
)
{
gs_launch
(
stream
,
batch_size
*
sample_size
)([
=
](
auto
i
)
__device__
{
auto
idx
=
output
.
get_shape
().
multi
(
i
);
auto
cdf_begin
=
cdf
.
begin
()
+
(
idx
.
front
()
*
class_size
);
auto
cdf_end
=
cdf_begin
+
class_size
;
auto
sample_iter
=
upper_bound
(
cdf_begin
,
cdf_end
,
dist
[
i
]
*
*
(
std
::
prev
(
cdf_end
)));
output
[
i
]
=
std
::
distance
(
cdf_begin
,
sample_iter
);
visit_all
(
arg0
,
arg1
)([
&
](
auto
cdf_host
,
auto
dist_host
)
{
result
.
visit
([
&
](
auto
output_host
)
{
hip_visit_views
(
cdf_host
,
dist_host
,
output_host
)(
[
&
](
auto
cdf
,
auto
dist
,
auto
output
)
{
gs_launch
(
stream
,
batch_size
*
sample_size
)([
=
](
auto
i
)
__device__
{
auto
idx
=
output
.
get_shape
().
multi
(
i
);
auto
cdf_begin
=
cdf
.
begin
()
+
(
idx
.
front
()
*
class_size
);
auto
cdf_end
=
cdf_begin
+
class_size
;
auto
*
sample_iter
=
upper_bound
(
cdf_begin
,
cdf_end
,
dist
[
i
]
*
*
(
std
::
prev
(
cdf_end
)));
output
[
i
]
=
std
::
distance
(
cdf_begin
,
sample_iter
);
});
});
});
});
});
}
...
...
src/targets/gpu/device/scatter.cpp
View file @
23cb7917
...
...
@@ -37,22 +37,26 @@ argument scatter(
hipStream_t
stream
,
argument
result
,
argument
arg0
,
argument
arg1
,
argument
arg2
,
int64_t
axis
)
{
auto
ds
=
arg0
.
get_shape
();
auto
inds
=
arg1
.
get_shape
();
auto
s1
=
arg1
.
get_shape
();
auto
axis_dim_size
=
ds
.
lens
()[
axis
];
hip_visit_all
(
result
,
arg0
,
inds
)([
&
](
auto
output
,
auto
data
,
auto
s1
)
{
hip_visit_all
(
result
,
arg0
,
arg2
)([
&
](
auto
output
,
auto
data
,
auto
update
)
{
auto
*
output_ptr
=
device_cast
(
output
.
data
());
const
auto
*
data_ptr
=
device_cast
(
data
.
data
());
gs_launch
(
stream
,
ds
.
elements
())([
=
](
auto
i
)
__device__
{
output_ptr
[
i
]
=
data_ptr
[
i
];
});
hip_visit_all
(
arg1
,
arg2
)([
&
](
auto
indices
,
auto
update
)
{
const
auto
*
upd_ptr
=
device_cast
(
update
.
data
());
const
auto
*
indices_ptr
=
device_cast
(
indices
.
data
());
gs_launch
(
stream
,
inds
.
elements
())([
=
](
auto
i
)
__device__
{
auto
out_idx
=
s1
.
multi
(
i
);
auto
index
=
indices_ptr
[
i
];
index
=
index
<
0
?
index
+
axis_dim_size
:
index
;
out_idx
[
axis
]
=
index
;
output
[
out_idx
]
=
upd_ptr
[
i
];
});
hip_visit_all
(
arg1
)([
&
](
auto
indices
)
{
if
constexpr
(
indices
.
get_shape
().
lens
.
size
()
==
output
.
get_shape
().
lens
.
size
())
{
const
auto
*
upd_ptr
=
device_cast
(
update
.
data
());
const
auto
*
indices_ptr
=
device_cast
(
indices
.
data
());
gs_launch
(
stream
,
s1
.
elements
())([
=
](
auto
i
)
__device__
{
auto
out_idx
=
indices
.
get_shape
().
multi
(
i
);
auto
index
=
indices_ptr
[
i
];
index
=
index
<
0
?
index
+
axis_dim_size
:
index
;
out_idx
[
axis
]
=
index
;
output
[
out_idx
]
=
upd_ptr
[
i
];
});
}
});
});
...
...
src/targets/gpu/device/topk.cpp
View file @
23cb7917
...
...
@@ -72,12 +72,12 @@ struct hip_heap_vector
index_int
l
=
2
*
index
+
1
;
index_int
r
=
2
*
index
+
2
;
if
(
l
<
n
&&
compare
(
data
[
data_index
(
l
)],
data
[
data_index
(
index
)]))
if
(
l
<
n
and
compare
(
data
[
data_index
(
l
)],
data
[
data_index
(
index
)]))
{
index
=
l
;
}
if
(
r
<
n
&&
compare
(
data
[
data_index
(
r
)],
data
[
data_index
(
index
)]))
if
(
r
<
n
and
compare
(
data
[
data_index
(
r
)],
data
[
data_index
(
index
)]))
{
index
=
r
;
if
(
compare
(
data
[
data_index
(
l
)],
data
[
data_index
(
r
)]))
...
...
src/targets/gpu/device_name.cpp
View file @
23cb7917
...
...
@@ -31,18 +31,6 @@ namespace migraphx {
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
template
<
class
HipDeviceProp
>
std
::
string
get_arch_name
(
rank
<
0
>
,
const
HipDeviceProp
&
props
)
{
return
"gfx"
+
std
::
to_string
(
props
.
gcnArch
);
}
template
<
class
HipDeviceProp
>
auto
get_arch_name
(
rank
<
1
>
,
const
HipDeviceProp
&
props
)
->
decltype
(
std
::
string
(
props
.
gcnArchName
))
{
return
std
::
string
(
props
.
gcnArchName
);
}
int
get_device_id
()
{
int
device
;
...
...
@@ -58,7 +46,7 @@ std::string get_device_name()
auto
status
=
hipGetDeviceProperties
(
&
props
,
get_device_id
());
if
(
status
!=
hipSuccess
)
MIGRAPHX_THROW
(
"Failed to get device properties"
);
return
get_a
rch
_n
ame
(
rank
<
1
>
{},
props
)
;
return
props
.
gcnA
rch
N
ame
;
}
}
// namespace gpu
...
...
src/targets/gpu/driver/CMakeLists.txt
View file @
23cb7917
...
...
@@ -22,7 +22,7 @@
# THE SOFTWARE.
#####################################################################################
file
(
GLOB GPU_DRIVER_SRCS
${
CONFIGURE_DEPENDS
}
${
CMAKE_CURRENT_SOURCE_DIR
}
/*.cpp
)
file
(
GLOB GPU_DRIVER_SRCS CONFIGURE_DEPENDS
${
CMAKE_CURRENT_SOURCE_DIR
}
/*.cpp
)
add_executable
(
gpu-driver
${
GPU_DRIVER_SRCS
}
)
...
...
src/targets/gpu/driver/compile_op.cpp
View file @
23cb7917
...
...
@@ -22,7 +22,7 @@
* THE SOFTWARE.
*/
#include <migraphx/gpu/driver/action.hpp>
#include <migraphx/gpu/
driver/perf
.hpp>
#include <migraphx/gpu/
time_op
.hpp>
#include <migraphx/gpu/compiler.hpp>
#include <migraphx/gpu/context.hpp>
...
...
src/targets/gpu/driver/run_op.cpp
View file @
23cb7917
...
...
@@ -22,7 +22,7 @@
* THE SOFTWARE.
*/
#include <migraphx/gpu/driver/action.hpp>
#include <migraphx/gpu/
driver/perf
.hpp>
#include <migraphx/gpu/
time_op
.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/make_op.hpp>
...
...
src/targets/gpu/fuse_ck.cpp
0 → 100644
View file @
23cb7917
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/gpu/fuse_ck.hpp>
#include <migraphx/matcher.hpp>
#include <migraphx/pass_manager.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/register_op.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
struct
module
;
namespace
gpu
{
struct
ck_gemm
{
operation
op
=
make_op
(
"dot"
);
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
pack
(
f
(
self
.
op
,
"op"
));
}
std
::
string
name
()
const
{
return
"gpu::ck_gemm"
;
}
void
check_gemm_shape
(
const
shape
&
s
)
const
{
if
(
not
contains
(
range
(
s
.
strides
().
rbegin
(),
s
.
strides
().
rbegin
()
+
3
),
1
))
MIGRAPHX_THROW
(
"Invalid shape for ck_gemm"
);
}
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
,
const
std
::
vector
<
module_ref
>&
mods
)
const
{
check_shapes
{
inputs
,
*
this
}.
same_ndims
();
if
(
inputs
.
size
()
<
2
)
MIGRAPHX_THROW
(
"should have at least two inputs."
);
auto
a
=
inputs
[
0
];
auto
b
=
inputs
[
1
];
for
(
const
auto
&
input
:
inputs
)
check_gemm_shape
(
input
);
auto
r
=
op
.
compute_shape
({
a
,
b
});
if
(
mods
.
empty
())
return
r
;
return
r
.
with_type
(
mods
.
front
()
->
get_output_shapes
().
front
().
type
());
}
};
MIGRAPHX_REGISTER_OP
(
ck_gemm
);
namespace
{
bool
is_ck_supported_type
(
shape
::
type_t
t
)
{
return
contains
({
shape
::
half_type
,
shape
::
int8_type
,
shape
::
int32_type
},
t
);
}
MIGRAPHX_PRED_MATCHER
(
is_ck_gemm
,
instruction_ref
ins
)
{
if
(
ins
->
name
()
!=
"dot"
and
ins
->
name
()
!=
"quant_dot"
)
return
false
;
if
(
not
is_ck_supported_type
(
ins
->
get_shape
().
type
()))
return
false
;
auto
a
=
ins
->
inputs
().
front
()
->
get_shape
();
auto
b
=
ins
->
inputs
().
back
()
->
get_shape
();
auto
m
=
a
.
lens
()[
a
.
lens
().
size
()
-
2
];
auto
n
=
b
.
lens
().
back
();
auto
k
=
a
.
lens
().
back
();
// Integer gemms must be divisible by 4 in ck
if
(
contains
({
shape
::
int8_type
,
shape
::
int32_type
},
ins
->
get_shape
().
type
()))
{
if
(
m
%
4
!=
0
)
return
false
;
if
(
n
%
4
!=
0
)
return
false
;
if
(
k
%
4
!=
0
)
return
false
;
}
// Skipping GEMMs with a K dimension greater than 2048 is a course-grained strategy
// to avoid poor-performing GEMM kernels from CK
// To-do: Investigate a more precise strategy
return
k
<=
2048
;
}
struct
find_ck_gemm_pointwise
{
// Find a gemm followed by a pointwise operation.
auto
matcher
()
const
{
auto
gemm
=
match
::
skip
(
match
::
name
(
"contiguous"
))(
match
::
name
(
"dot"
,
"quant_dot"
)(
is_ck_gemm
().
bind
(
"gemm"
)));
return
match
::
name
(
"pointwise"
)(
match
::
any_of
[
match
::
inputs
()](
gemm
.
bind
(
"x"
)));
}
void
apply
(
module_pass_manager
&
mpm
,
const
match
::
matcher_result
&
r
)
const
{
auto
ins
=
r
.
result
;
auto
gemm_ins
=
r
.
instructions
[
"gemm"
];
auto
x_ins
=
r
.
instructions
[
"x"
];
// input after contiguous
auto
*
pm
=
ins
->
module_inputs
().
front
();
auto
names
=
pm
->
get_parameter_names
();
std
::
sort
(
names
.
begin
(),
names
.
end
());
auto
inputs
=
ins
->
inputs
();
auto
gemm_it
=
std
::
find
(
inputs
.
begin
(),
inputs
.
end
(),
x_ins
);
auto
gemm_idx
=
gemm_it
-
inputs
.
begin
();
if
(
gemm_ins
->
get_shape
().
type
()
!=
shape
::
int32_type
and
ins
->
get_shape
().
type
()
!=
gemm_ins
->
get_shape
().
type
())
return
;
if
(
std
::
any_of
(
ins
->
inputs
().
begin
(),
ins
->
inputs
().
end
(),
[](
auto
input
)
{
return
not
is_ck_supported_type
(
input
->
get_shape
().
type
());
}))
return
;
assert
(
gemm_it
!=
inputs
.
end
());
if
(
gemm_idx
!=
0
)
{
auto
first_param
=
pm
->
get_parameter
(
names
[
0
]);
auto
gemm_param
=
pm
->
get_parameter
(
names
[
gemm_idx
]);
auto
new_gemm_param
=
pm
->
add_parameter
(
names
[
0
]
+
"_0"
,
gemm_param
->
get_shape
());
auto
new_first_param
=
pm
->
add_parameter
(
names
[
gemm_idx
]
+
"_0"
,
first_param
->
get_shape
());
pm
->
replace_instruction
(
gemm_param
,
new_gemm_param
);
pm
->
replace_instruction
(
first_param
,
new_first_param
);
pm
->
remove_instruction
(
first_param
);
pm
->
remove_instruction
(
gemm_param
);
}
inputs
.
erase
(
gemm_it
);
inputs
.
insert
(
inputs
.
begin
(),
gemm_ins
->
inputs
().
begin
(),
gemm_ins
->
inputs
().
end
());
mpm
.
get_module
().
replace_instruction
(
ins
,
ck_gemm
{
gemm_ins
->
get_operator
()},
inputs
,
{
pm
});
}
};
struct
find_ck_gemm
{
auto
matcher
()
const
{
return
match
::
name
(
"dot"
)(
is_ck_gemm
().
bind
(
"gemm"
));
}
void
apply
(
module_pass_manager
&
mpm
,
const
match
::
matcher_result
&
r
)
const
{
auto
ins
=
r
.
result
;
mpm
.
get_module
().
replace_instruction
(
ins
,
ck_gemm
{
ins
->
get_operator
()},
ins
->
inputs
());
}
};
}
// namespace
void
fuse_ck
::
apply
(
module_pass_manager
&
mpm
)
const
{
match
::
find_matches
(
mpm
,
find_ck_gemm_pointwise
{});
match
::
find_matches
(
mpm
,
find_ck_gemm
{});
}
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/fuse_mlir.cpp
View file @
23cb7917
...
...
@@ -38,6 +38,27 @@ namespace gpu {
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_ENABLE_MLIR
);
bool
mlir_enabled
()
{
#ifdef MIGRAPHX_MLIR
const
bool
mlir_enabled
=
enabled
(
MIGRAPHX_ENABLE_MLIR
{});
if
(
mlir_enabled
)
{
return
true
;
}
else
{
std
::
cerr
<<
"WARNING: MIGraphX built with MLIR but it is not enabled. Please set the env "
"var MIGRAPHX_ENABLE_MLIR to use MLIR kernel generator."
<<
std
::
endl
;
return
false
;
}
#else
return
false
;
#endif
}
#ifdef MIGRAPHX_MLIR
struct
mlir_op
...
...
@@ -58,8 +79,41 @@ struct mlir_op
MIGRAPHX_THROW
(
"should have one submodule."
);
if
(
inputs
.
size
()
<
2
)
MIGRAPHX_THROW
(
"should have at least two inputs."
);
auto
n
=
inputs
.
size
();
return
op
.
compute_shape
({
inputs
[
n
-
2
],
inputs
[
n
-
1
]});
module_ref
mod
=
mods
[
0
];
auto
type
=
mod
->
get_output_shapes
().
front
().
type
();
std
::
unordered_map
<
instruction_ref
,
shape
>
ins_shapes
;
size_t
param_cnt
=
0
;
std
::
vector
<
std
::
string
>
names
=
mod
->
get_parameter_names
();
std
::
sort
(
names
.
begin
(),
names
.
end
());
for
(
const
std
::
string
&
param_name
:
names
)
{
ins_shapes
[
mod
->
get_parameter
(
param_name
)]
=
inputs
[
param_cnt
++
];
}
for
(
auto
ins
:
iterator_for
(
*
mod
))
{
if
(
ins
->
name
()
==
"@param"
)
{
continue
;
}
if
(
ins
->
name
()
==
"@literal"
)
{
ins_shapes
[
ins
]
=
ins
->
get_shape
();
continue
;
}
if
(
ins
->
name
()
==
"@return"
)
{
return
ins_shapes
[
ins
->
inputs
().
at
(
0
)].
with_type
(
type
);
}
std
::
vector
<
shape
>
input_shapes
;
input_shapes
.
resize
(
ins
->
inputs
().
size
());
std
::
transform
(
ins
->
inputs
().
begin
(),
ins
->
inputs
().
end
(),
input_shapes
.
begin
(),
[
&
](
auto
in
)
{
return
ins_shapes
[
in
];
});
ins_shapes
[
ins
]
=
ins
->
get_operator
().
compute_shape
(
input_shapes
);
}
MIGRAPHX_THROW
(
"No return found in the submodule"
);
}
};
MIGRAPHX_REGISTER_OP
(
mlir_op
);
...
...
@@ -68,7 +122,7 @@ namespace {
MIGRAPHX_PRED_MATCHER
(
is_mlir_conv
,
instruction_ref
ins
)
{
if
(
ins
->
name
()
!=
"convolution"
)
if
(
ins
->
name
()
!=
"convolution"
and
ins
->
name
()
!=
"quant_convolution"
)
return
false
;
value
v
=
ins
->
get_operator
().
to_value
();
auto
group
=
v
.
at
(
"group"
).
to
<
int
>
();
...
...
@@ -85,10 +139,126 @@ struct find_mlir_op
auto
matcher
()
const
{
auto
dot_or_conv
=
match
::
skip
(
match
::
name
(
"contiguous"
))(
match
::
any_of
(
match
::
name
(
"dot"
),
is_mlir_conv
()).
bind
(
"gemm_based_op"
));
match
::
any_of
(
match
::
name
(
"dot"
),
match
::
name
(
"quant_dot"
),
is_mlir_conv
())
.
bind
(
"gemm_based_op"
));
return
match
::
name
(
"pointwise"
)(
match
::
any_of
[
match
::
inputs
()](
dot_or_conv
.
bind
(
"x"
)));
}
std
::
unordered_map
<
instruction_ref
,
instruction_ref
>
create_param_map_with_literals
(
module_ref
mm
,
const
module
*
pm
,
const
shape
&
shape
)
const
{
std
::
unordered_map
<
instruction_ref
,
instruction_ref
>
ins_map
;
for
(
auto
ins
:
iterator_for
(
*
pm
))
{
if
(
ins
->
name
()
!=
"@literal"
)
{
continue
;
}
literal
r
=
ins
->
get_literal
();
instruction_ref
literal
=
mm
->
add_literal
(
r
);
instruction_ref
mbcast
=
mm
->
add_instruction
(
make_op
(
"multibroadcast"
,
{{
"out_lens"
,
shape
.
lens
()}}),
literal
);
ins_map
[
ins
]
=
mbcast
;
}
return
ins_map
;
}
std
::
tuple
<
instruction_ref
,
std
::
vector
<
instruction_ref
>>
fuse_input_ops_and_gemm_based_op
(
module_ref
mm
,
instruction_ref
gemm_based_op
)
const
{
std
::
vector
<
instruction_ref
>
top_inputs
;
std
::
vector
<
instruction_ref
>
imm_inputs
;
size_t
input_cnt
=
0
;
for
(
instruction_ref
input
:
gemm_based_op
->
inputs
())
{
std
::
vector
<
operation
>
op_stream
;
while
(
contains
({
"slice"
,
"transpose"
,
"contiguous"
,
"reshape"
},
input
->
name
()))
{
op_stream
.
push_back
(
input
->
get_operator
());
input
=
input
->
inputs
().
at
(
0
);
}
top_inputs
.
push_back
(
input
);
instruction_ref
prev_input
=
mm
->
add_parameter
(
"y"
+
std
::
to_string
(
input_cnt
++
),
input
->
get_shape
());
for
(
const
auto
&
op
:
reverse
(
op_stream
))
{
prev_input
=
mm
->
add_instruction
(
op
,
{
prev_input
});
}
imm_inputs
.
push_back
(
prev_input
);
}
instruction_ref
new_gemm_based_op
=
mm
->
add_instruction
(
gemm_based_op
->
get_operator
(),
imm_inputs
);
return
{
new_gemm_based_op
,
top_inputs
};
}
// Whitelist supported fusion options, including imposing type constraints
// for cases where MLIR only supports an operation (usually a pointwise function)
// on particular types.
bool
is_pointwise_op_supported_by_mlir
(
const
instruction
&
i
)
const
{
using
type_t
=
shape
::
type_t
;
const
auto
&
name
=
i
.
name
();
const
auto
result_type
=
i
.
get_shape
().
type
();
const
std
::
initializer_list
<
type_t
>
allowed_types
=
{
type_t
::
float_type
,
type_t
::
half_type
,
type_t
::
int8_type
,
type_t
::
int32_type
,
type_t
::
bool_type
};
// Preliminary type check.
if
(
not
contains
(
allowed_types
,
result_type
))
{
return
false
;
}
const
std
::
initializer_list
<
std
::
string
>
any_type_ops
=
{
"@literal"
,
"@param"
,
"@return"
};
const
std
::
initializer_list
<
std
::
string
>
no_bool_ops
=
{
"convolution"
,
"quant_convolution"
,
"dot"
,
"quant_dot"
,
"add"
,
"clip"
,
"relu"
,
"sub"
,
"mul"
,
"div"
,
"pow"
,
"where"
,
"quantizelinear"
,
"dequantizelinear"
,
"abs"
,
"neg"
,
};
const
std
::
initializer_list
<
std
::
string
>
fp_only_ops
=
{
"ceil"
,
"erf"
,
"exp"
,
"floor"
,
"log"
,
"recip"
,
"rsqrt"
,
// There are bugs in MLIR right now for models using sigmoid so disable it for now
// "sigmoid",
"softmax"
,
"tanh"
,
};
bool
is_float
=
contains
({
type_t
::
float_type
,
type_t
::
half_type
},
result_type
);
if
(
contains
(
any_type_ops
,
name
))
return
true
;
if
(
result_type
!=
type_t
::
bool_type
and
contains
(
no_bool_ops
,
name
))
return
true
;
if
(
is_float
and
contains
(
fp_only_ops
,
name
))
return
true
;
// Only conversions between floating types are known to be unambigiously
// supported.
if
(
is_float
and
name
==
"convert"
)
{
return
std
::
all_of
(
i
.
inputs
().
begin
(),
i
.
inputs
().
end
(),
[](
const
auto
&
arg
)
{
return
contains
({
type_t
::
float_type
,
type_t
::
half_type
},
arg
->
get_shape
().
type
());
});
}
return
false
;
}
void
apply
(
module_pass_manager
&
mpm
,
const
match
::
matcher_result
&
r
)
const
{
auto
ins
=
r
.
result
;
...
...
@@ -96,35 +266,25 @@ struct find_mlir_op
auto
x_ins
=
r
.
instructions
[
"x"
];
// input after contiguous
auto
*
pm
=
ins
->
module_inputs
().
front
();
auto
names
=
pm
->
get_parameter_names
();
// Whitelist pointwise operators
if
(
std
::
any_of
(
pm
->
begin
(),
pm
->
end
(),
[](
const
auto
&
i
)
{
return
not
contains
(
{
"@literal"
,
"@param"
,
"@return"
,
"convolution"
,
"dot"
,
"add"
,
"relu"
},
i
.
name
());
}))
return
;
// Only fuse with fp32/fp16
if
(
std
::
any_of
(
ins
->
inputs
().
begin
(),
ins
->
inputs
().
end
(),
[
&
](
auto
i
)
{
return
not
contains
({
shape
::
type_t
::
float_type
,
shape
::
type_t
::
half_type
},
i
->
get_shape
().
type
());
// Whitelist pointwise operators.
if
(
std
::
any_of
(
pm
->
begin
(),
pm
->
end
(),
[
&
](
const
auto
&
i
)
{
return
not
is_pointwise_op_supported_by_mlir
(
i
);
}))
return
;
std
::
sort
(
names
.
begin
(),
names
.
end
());
module_ref
mm
=
mpm
.
create_module
(
"mlir_"
+
pm
->
name
());
mm
->
set_bypass
();
std
::
unordered_map
<
instruction_ref
,
instruction_ref
>
param_map
;
auto
x
=
mm
->
add_parameter
(
"x"
+
std
::
to_string
(
names
.
size
()),
gemm_based_op
->
inputs
().
at
(
0
)
->
get_shape
());
auto
w
=
mm
->
add_parameter
(
"x"
+
std
::
to_string
(
names
.
size
()
+
1
),
gemm_based_op
->
inputs
().
at
(
1
)
->
get_shape
());
auto
conv
=
mm
->
add_instruction
(
gemm_based_op
->
get_operator
(),
{
x
,
w
});
std
::
unordered_map
<
instruction_ref
,
instruction_ref
>
param_map
=
create_param_map_with_literals
(
mm
,
pm
,
gemm_based_op
->
get_shape
());
auto
[
anchor_op
,
top_inputs
]
=
fuse_input_ops_and_gemm_based_op
(
mm
,
gemm_based_op
);
std
::
transform
(
names
.
begin
(),
names
.
end
(),
ins
->
inputs
().
begin
(),
std
::
inserter
(
param_map
,
param_map
.
end
()),
[
&
](
auto
name
,
auto
input
)
{
[
&
,
&
anchor_op
=
anchor_op
](
auto
name
,
auto
input
)
{
if
(
input
==
x_ins
)
return
std
::
make_pair
(
pm
->
get_parameter
(
name
),
conv
);
return
std
::
make_pair
(
pm
->
get_parameter
(
name
),
anchor_op
);
return
std
::
make_pair
(
pm
->
get_parameter
(
name
),
mm
->
add_parameter
(
name
,
input
->
get_shape
()));
});
...
...
@@ -135,7 +295,7 @@ struct find_mlir_op
ins
->
inputs
().
end
(),
std
::
back_inserter
(
inputs
),
[
&
](
auto
input
)
{
return
input
!=
gemm_based_op
;
});
inputs
.
insert
(
inputs
.
end
(),
gemm_based_op
->
inputs
()
.
begin
(),
gemm_based_op
->
inputs
()
.
end
());
inputs
.
insert
(
inputs
.
end
(),
top_
inputs
.
begin
(),
top_
inputs
.
end
());
mpm
.
get_module
().
replace_instruction
(
ins
,
mlir_op
{
gemm_based_op
->
get_operator
()},
inputs
,
{
mm
});
}
...
...
@@ -148,17 +308,7 @@ struct find_mlir_op
void
fuse_mlir
::
apply
(
module_pass_manager
&
mpm
)
const
{
#ifdef MIGRAPHX_MLIR
const
bool
mlir_enabled
=
enabled
(
MIGRAPHX_ENABLE_MLIR
{});
if
(
mlir_enabled
)
{
match
::
find_matches
(
mpm
,
find_mlir_op
{});
}
else
{
std
::
cerr
<<
"WARNING: MIGraphX built with MLIR but it is not enabled. Please set the env "
"var MIGRAPHX_ENABLE_MLIR to use MLIR kernel generator."
<<
std
::
endl
;
}
match
::
find_matches
(
mpm
,
find_mlir_op
{});
#else
(
void
)
mpm
;
#endif
...
...
src/targets/gpu/fuse_ops.cpp
View file @
23cb7917
...
...
@@ -165,7 +165,8 @@ struct fusion
const
std
::
unordered_set
<
std
::
string
>&
get_supported_archs
()
{
static
std
::
unordered_set
<
std
::
string
>
supported_archs
{
"gfx900"
,
"gfx906"
,
"gfx908"
,
"gfx1030"
};
static
std
::
unordered_set
<
std
::
string
>
supported_archs
{
"gfx900"
,
"gfx906"
,
"gfx908"
,
"gfx1030"
,
"gfx940"
};
return
supported_archs
;
}
...
...
src/targets/gpu/gemm_impl.cpp
View file @
23cb7917
...
...
@@ -158,7 +158,15 @@ struct gemm_impl
{
beta
=
0
;
}
if
(
arg_type
==
rocblas_datatype_f16_r
)
compute_type
=
rocblas_datatype_f32_r
;
}
rocblas_gemm_flags
flag
=
rocblas_gemm_flags_none
;
#if ROCBLAS_VERSION_MAJOR < 3
if
(
int8_x4_format
)
flag
=
rocblas_gemm_flags_pack_int8x4
;
#endif
// Create lambdas that will cast alpha, beta to the output shape's type
// and retain the values being pointed to
output_shape
.
visit_type
([
&
](
auto
as
)
{
...
...
src/targets/gpu/hip.cpp
View file @
23cb7917
...
...
@@ -146,7 +146,11 @@ std::vector<T> read_from_gpu(const void* x, std::size_t sz)
gpu_sync
();
std
::
vector
<
T
>
result
(
sz
);
assert
(
not
is_device_ptr
(
result
.
data
()));
assert
(
is_device_ptr
(
x
));
if
(
not
is_device_ptr
(
x
))
{
MIGRAPHX_THROW
(
"read_from_gpu() requires Src buffer to be on the GPU, Copy from gpu failed
\n
"
);
}
auto
status
=
hipMemcpy
(
result
.
data
(),
x
,
sz
*
sizeof
(
T
),
hipMemcpyDeviceToHost
);
if
(
status
!=
hipSuccess
)
MIGRAPHX_THROW
(
"Copy from gpu failed: "
+
hip_error
(
status
));
// NOLINT
...
...
src/targets/gpu/include/migraphx/gpu/allocation_model.hpp
View file @
23cb7917
...
...
@@ -24,7 +24,7 @@
#ifndef MIGRAPHX_GUARD_AMDMIGRAPHX_GPU_ALLOCATION_MODEL_HPP
#define MIGRAPHX_GUARD_AMDMIGRAPHX_GPU_ALLOCATION_MODEL_HPP
#include <migraphx/config.hpp>
#include <migraphx/
gpu/
config.hpp>
#include <migraphx/operation.hpp>
#include <migraphx/instruction_ref.hpp>
#include <string>
...
...
@@ -33,7 +33,7 @@ namespace migraphx {
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
struct
gpu_allocation_model
struct
MIGRAPHX_GPU_EXPORT
gpu_allocation_model
{
std
::
string
name
()
const
;
std
::
string
copy
()
const
;
...
...
src/targets/gpu/include/migraphx/gpu/analyze_streams.hpp
View file @
23cb7917
...
...
@@ -24,7 +24,7 @@
#ifndef MIGRAPHX_GUARD_RTGLIB_GPU_ANALYZE_STREAMS_HPP
#define MIGRAPHX_GUARD_RTGLIB_GPU_ANALYZE_STREAMS_HPP
#include <migraphx/config.hpp>
#include <migraphx/
gpu/
config.hpp>
#include <migraphx/analyze_streams.hpp>
namespace
migraphx
{
...
...
@@ -34,7 +34,7 @@ struct module;
namespace
gpu
{
std
::
vector
<
stream_race
>
analyze_streams
(
const
module
&
m
);
MIGRAPHX_GPU_EXPORT
std
::
vector
<
stream_race
>
analyze_streams
(
const
module
&
m
);
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
...
...
src/targets/gpu/include/migraphx/gpu/compile_hip.hpp
View file @
23cb7917
...
...
@@ -24,9 +24,10 @@
#ifndef MIGRAPHX_GUARD_RTGLIB_COMPILE_HIP_HPP
#define MIGRAPHX_GUARD_RTGLIB_COMPILE_HIP_HPP
#include <migraphx/config.hpp>
#include <migraphx/
gpu/
config.hpp>
#include <migraphx/filesystem.hpp>
#include <migraphx/compile_src.hpp>
#include <migraphx/env.hpp>
#include <migraphx/functional.hpp>
#include <string>
#include <utility>
...
...
@@ -36,6 +37,11 @@ namespace migraphx {
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
#ifdef MIGRAPHX_USE_HIPRTC
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_TRACE_HIPRTC
);
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_ENABLE_HIPRTC_WORKAROUNDS
);
#endif
struct
hiprtc_src_file
{
hiprtc_src_file
()
=
default
;
...
...
@@ -52,14 +58,13 @@ struct hiprtc_src_file
}
};
std
::
vector
<
std
::
vector
<
char
>>
compile_hip_src_with_hiprtc
(
std
::
vector
<
hiprtc_src_file
>
srcs
,
std
::
string
params
,
const
std
::
string
&
arch
);
MIGRAPHX_GPU_EXPORT
std
::
vector
<
std
::
vector
<
char
>>
compile_hip_src_with_hiprtc
(
std
::
vector
<
hiprtc_src_file
>
srcs
,
std
::
string
params
,
const
std
::
string
&
arch
);
std
::
vector
<
std
::
vector
<
char
>>
MIGRAPHX_GPU_EXPORT
std
::
vector
<
std
::
vector
<
char
>>
compile_hip_src
(
const
std
::
vector
<
src_file
>&
srcs
,
std
::
string
params
,
const
std
::
string
&
arch
);
std
::
string
enum_params
(
std
::
size_t
count
,
std
::
string
param
);
MIGRAPHX_GPU_EXPORT
std
::
string
enum_params
(
std
::
size_t
count
,
std
::
string
param
);
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
...
...
src/targets/gpu/include/migraphx/gpu/compile_hip_code_object.hpp
View file @
23cb7917
...
...
@@ -24,8 +24,9 @@
#ifndef MIGRAPHX_GUARD_GPU_COMPILE_HIP_CODE_OBJECT_HPP
#define MIGRAPHX_GUARD_GPU_COMPILE_HIP_CODE_OBJECT_HPP
#include <migraphx/config.hpp>
#include <migraphx/
gpu/
config.hpp>
#include <migraphx/operation.hpp>
#include <migraphx/compile_src.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
...
@@ -39,9 +40,10 @@ struct hip_compile_options
std
::
size_t
local
;
std
::
vector
<
shape
>
inputs
;
shape
output
;
std
::
string
kernel_name
=
"kernel"
;
std
::
string
params
=
""
;
std
::
vector
<
shape
>
virtual_inputs
=
{};
std
::
string
kernel_name
=
"kernel"
;
std
::
string
params
=
""
;
std
::
vector
<
shape
>
virtual_inputs
=
{};
std
::
vector
<
src_file
>
additional_src_files
=
{};
/**
* @brief Set the launch parameters but allow v to override the values
...
...
@@ -64,14 +66,16 @@ struct hip_compile_options
};
/// Compute global for n elements, but max out on target-specific upper limit
std
::
function
<
std
::
size_t
(
std
::
size_t
local
)
>
MIGRAPHX_GPU_EXPORT
std
::
function
<
std
::
size_t
(
std
::
size_t
local
)
>
compute_global_for
(
context
&
ctx
,
std
::
size_t
n
,
std
::
size_t
over
=
1
);
operation
compile_hip_code_object
(
const
std
::
string
&
content
,
hip_compile_options
options
);
MIGRAPHX_GPU_EXPORT
operation
compile_hip_code_object
(
const
std
::
string
&
content
,
hip_compile_options
options
);
std
::
size_t
compute_block_size
(
std
::
size_t
n
,
std
::
size_t
max_block_size
=
1024
);
MIGRAPHX_GPU_EXPORT
std
::
size_t
compute_block_size
(
std
::
size_t
n
,
std
::
size_t
max_block_size
=
1024
);
std
::
string
generate_make_shape
(
const
shape
&
s
);
MIGRAPHX_GPU_EXPORT
std
::
string
generate_make_shape
(
const
shape
&
s
);
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
...
...
Prev
1
…
9
10
11
12
13
14
15
16
17
…
23
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment