Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
ffcb68b4
Commit
ffcb68b4
authored
Oct 23, 2023
by
Manupa Karunaratne
Browse files
Merge branch 'develop' of
https://github.com/ROCmSoftwarePlatform/AMDMIGraphX
into mlir-attention
parents
ee88607c
7604ecf5
Changes
115
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
191 additions
and
76 deletions
+191
-76
src/program.cpp
src/program.cpp
+1
-1
src/rewrite_quantization.cpp
src/rewrite_quantization.cpp
+18
-3
src/targets/gpu/argmax.cpp
src/targets/gpu/argmax.cpp
+3
-2
src/targets/gpu/argmin.cpp
src/targets/gpu/argmin.cpp
+3
-2
src/targets/gpu/compile_hip_code_object.cpp
src/targets/gpu/compile_hip_code_object.cpp
+19
-7
src/targets/gpu/compile_ops.cpp
src/targets/gpu/compile_ops.cpp
+22
-6
src/targets/gpu/device/argmax.cpp
src/targets/gpu/device/argmax.cpp
+10
-3
src/targets/gpu/device/argmin.cpp
src/targets/gpu/device/argmin.cpp
+10
-3
src/targets/gpu/fuse_ck.cpp
src/targets/gpu/fuse_ck.cpp
+18
-3
src/targets/gpu/include/migraphx/gpu/convolution.hpp
src/targets/gpu/include/migraphx/gpu/convolution.hpp
+3
-3
src/targets/gpu/include/migraphx/gpu/device/arg_op.hpp
src/targets/gpu/include/migraphx/gpu/device/arg_op.hpp
+39
-3
src/targets/gpu/include/migraphx/gpu/device/argmax.hpp
src/targets/gpu/include/migraphx/gpu/device/argmax.hpp
+3
-2
src/targets/gpu/include/migraphx/gpu/device/argmin.hpp
src/targets/gpu/include/migraphx/gpu/device/argmin.hpp
+3
-2
src/targets/gpu/kernels/include/migraphx/kernels/index.hpp
src/targets/gpu/kernels/include/migraphx/kernels/index.hpp
+33
-30
test/eliminate_allocation_test.cpp
test/eliminate_allocation_test.cpp
+1
-1
test/eliminate_concat_test.cpp
test/eliminate_concat_test.cpp
+2
-2
test/memory_coloring_test.cpp
test/memory_coloring_test.cpp
+1
-1
test/normalize_ops_test.cpp
test/normalize_ops_test.cpp
+1
-1
test/onnx/.onnxrt-commit
test/onnx/.onnxrt-commit
+1
-1
test/onnx/argmax_select_last_index_test.onnx
test/onnx/argmax_select_last_index_test.onnx
+0
-0
No files found.
src/program.cpp
View file @
ffcb68b4
...
@@ -936,7 +936,7 @@ void program::perf_report(std::ostream& os,
...
@@ -936,7 +936,7 @@ void program::perf_report(std::ostream& os,
os
<<
std
::
endl
;
os
<<
std
::
endl
;
os
<<
"Batch size: "
<<
batch
<<
std
::
endl
;
os
<<
"Batch size: "
<<
batch
<<
std
::
endl
;
os
<<
"Rate: "
<<
rate
*
batch
<<
"/sec"
<<
std
::
endl
;
os
<<
"Rate: "
<<
rate
*
batch
<<
"
inferences
/sec"
<<
std
::
endl
;
os
<<
"Total time: "
<<
total_time
<<
"ms"
<<
std
::
endl
;
os
<<
"Total time: "
<<
total_time
<<
"ms"
<<
std
::
endl
;
os
<<
"Total instructions time: "
<<
total_instruction_time
<<
"ms"
<<
std
::
endl
;
os
<<
"Total instructions time: "
<<
total_instruction_time
<<
"ms"
<<
std
::
endl
;
os
<<
"Overhead time: "
<<
overhead_time
<<
"ms"
os
<<
"Overhead time: "
<<
overhead_time
<<
"ms"
...
...
src/rewrite_quantization.cpp
View file @
ffcb68b4
...
@@ -33,6 +33,8 @@
...
@@ -33,6 +33,8 @@
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_ENABLE_CK_WORKAROUNDS
);
void
apply_quantizelinear
(
module
&
m
,
instruction_ref
ins
)
void
apply_quantizelinear
(
module
&
m
,
instruction_ref
ins
)
{
{
assert
(
ins
->
name
()
==
"quantizelinear"
);
assert
(
ins
->
name
()
==
"quantizelinear"
);
...
@@ -62,9 +64,22 @@ void apply_quantizelinear(module& m, instruction_ref ins)
...
@@ -62,9 +64,22 @@ void apply_quantizelinear(module& m, instruction_ref ins)
max_quant
=
qt
.
max
();
max_quant
=
qt
.
max
();
min_quant
=
qt
.
min
();
min_quant
=
qt
.
min
();
});
});
auto
s
=
add_zero_point
->
get_shape
();
auto
s
=
add_zero_point
->
get_shape
();
auto
min_arg
=
m
.
add_literal
(
literal
{
shape
{
s
.
type
()},
{
min_quant
}});
instruction_ref
min_arg
;
auto
max_arg
=
m
.
add_literal
(
literal
{
shape
{
s
.
type
()},
{
max_quant
}});
instruction_ref
max_arg
;
if
(
enabled
(
MIGRAPHX_ENABLE_CK_WORKAROUNDS
{}))
{
std
::
vector
<
int
>
min_data
(
s
.
elements
(),
min_quant
);
std
::
vector
<
int
>
max_data
(
s
.
elements
(),
max_quant
);
min_arg
=
m
.
add_literal
(
literal
(
s
,
min_data
));
max_arg
=
m
.
add_literal
(
literal
(
s
,
max_data
));
}
else
{
min_arg
=
m
.
add_literal
(
literal
{
shape
{
s
.
type
()},
{
min_quant
}});
max_arg
=
m
.
add_literal
(
literal
{
shape
{
s
.
type
()},
{
max_quant
}});
}
auto
saturate
=
insert_common_op
(
m
,
ins
,
make_op
(
"clip"
),
{
add_zero_point
,
min_arg
,
max_arg
});
auto
saturate
=
insert_common_op
(
m
,
ins
,
make_op
(
"clip"
),
{
add_zero_point
,
min_arg
,
max_arg
});
m
.
replace_instruction
(
m
.
replace_instruction
(
ins
,
make_op
(
"convert"
,
{{
"target_type"
,
ins
->
get_shape
().
type
()}}),
saturate
);
ins
,
make_op
(
"convert"
,
{{
"target_type"
,
ins
->
get_shape
().
type
()}}),
saturate
);
...
...
src/targets/gpu/argmax.cpp
View file @
ffcb68b4
/*
/*
* The MIT License (MIT)
* The MIT License (MIT)
*
*
* Copyright (c) 2015-202
2
Advanced Micro Devices, Inc. All rights reserved.
* Copyright (c) 2015-202
3
Advanced Micro Devices, Inc. All rights reserved.
*
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* of this software and associated documentation files (the "Software"), to deal
...
@@ -40,7 +40,8 @@ argument hip_argmax::compute(context& ctx, const shape&, const std::vector<argum
...
@@ -40,7 +40,8 @@ argument hip_argmax::compute(context& ctx, const shape&, const std::vector<argum
{
{
auto
n_dim
=
args
.
front
().
get_shape
().
lens
().
size
();
auto
n_dim
=
args
.
front
().
get_shape
().
lens
().
size
();
int64_t
tuned_axis
=
tune_axis
(
n_dim
,
op
.
axis
,
op
.
name
());
int64_t
tuned_axis
=
tune_axis
(
n_dim
,
op
.
axis
,
op
.
name
());
device
::
argmax
(
ctx
.
get_stream
().
get
(),
args
.
back
(),
args
.
front
(),
tuned_axis
);
device
::
argmax
(
ctx
.
get_stream
().
get
(),
args
.
back
(),
args
.
front
(),
tuned_axis
,
op
.
select_last_index
);
return
args
.
back
();
return
args
.
back
();
}
}
...
...
src/targets/gpu/argmin.cpp
View file @
ffcb68b4
/*
/*
* The MIT License (MIT)
* The MIT License (MIT)
*
*
* Copyright (c) 2015-202
2
Advanced Micro Devices, Inc. All rights reserved.
* Copyright (c) 2015-202
3
Advanced Micro Devices, Inc. All rights reserved.
*
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* of this software and associated documentation files (the "Software"), to deal
...
@@ -40,7 +40,8 @@ argument hip_argmin::compute(context& ctx, const shape&, const std::vector<argum
...
@@ -40,7 +40,8 @@ argument hip_argmin::compute(context& ctx, const shape&, const std::vector<argum
{
{
auto
n_dim
=
args
.
front
().
get_shape
().
lens
().
size
();
auto
n_dim
=
args
.
front
().
get_shape
().
lens
().
size
();
int64_t
tuned_axis
=
tune_axis
(
n_dim
,
op
.
axis
,
op
.
name
());
int64_t
tuned_axis
=
tune_axis
(
n_dim
,
op
.
axis
,
op
.
name
());
device
::
argmin
(
ctx
.
get_stream
().
get
(),
args
.
back
(),
args
.
front
(),
tuned_axis
);
device
::
argmin
(
ctx
.
get_stream
().
get
(),
args
.
back
(),
args
.
front
(),
tuned_axis
,
op
.
select_last_index
);
return
args
.
back
();
return
args
.
back
();
}
}
...
...
src/targets/gpu/compile_hip_code_object.cpp
View file @
ffcb68b4
...
@@ -139,6 +139,12 @@ void hip_compile_options::set_launch_params(
...
@@ -139,6 +139,12 @@ void hip_compile_options::set_launch_params(
global
=
compute_global
(
local
);
global
=
compute_global
(
local
);
}
}
static
bool
hip_accept_non_uniform_wg
()
{
static
bool
non_uniform_wg
=
hip_has_flags
({
"-fno-offload-uniform-block"
});
return
non_uniform_wg
;
}
std
::
function
<
std
::
size_t
(
std
::
size_t
local
)
>
std
::
function
<
std
::
size_t
(
std
::
size_t
local
)
>
compute_global_for
(
context
&
ctx
,
std
::
size_t
n
,
std
::
size_t
over
)
compute_global_for
(
context
&
ctx
,
std
::
size_t
n
,
std
::
size_t
over
)
{
{
...
@@ -146,13 +152,14 @@ compute_global_for(context& ctx, std::size_t n, std::size_t over)
...
@@ -146,13 +152,14 @@ compute_global_for(context& ctx, std::size_t n, std::size_t over)
std
::
size_t
max_global
=
ctx
.
get_current_device
().
get_cu_count
()
*
std
::
size_t
max_global
=
ctx
.
get_current_device
().
get_cu_count
()
*
ctx
.
get_current_device
().
get_max_workitems_per_cu
();
ctx
.
get_current_device
().
get_max_workitems_per_cu
();
return
[
n
,
over
,
max_global
](
std
::
size_t
local
)
{
return
[
n
,
over
,
max_global
](
std
::
size_t
local
)
{
// hip require global workitems multiple of local workitems. It may degrade performance.
std
::
size_t
num_elements
=
n
;
// [TODO]: consider adding "fno-hip-uniform-block" flag when it becomes available.
if
(
not
hip_accept_non_uniform_wg
())
// https://reviews.llvm.org/D155213
{
std
::
size_t
num_elements
=
((
n
+
local
-
1
)
/
local
)
*
local
;
num_elements
=
(
1
+
(
n
-
1
)
/
local
)
*
local
;
std
::
size_t
groups
=
(
num_elements
+
local
-
1
)
/
local
;
}
std
::
size_t
max_blocks
=
max_global
/
local
;
std
::
size_t
groups
=
1
+
(
num_elements
-
1
)
/
local
;
std
::
size_t
nglobal
=
std
::
min
(
max_blocks
*
over
,
groups
)
*
local
;
std
::
size_t
max_blocks
=
max_global
/
local
;
std
::
size_t
nglobal
=
std
::
min
(
max_blocks
*
over
,
groups
)
*
local
;
return
std
::
min
(
nglobal
,
num_elements
);
return
std
::
min
(
nglobal
,
num_elements
);
};
};
}
}
...
@@ -183,6 +190,11 @@ operation compile_hip_code_object(const std::string& content, hip_compile_option
...
@@ -183,6 +190,11 @@ operation compile_hip_code_object(const std::string& content, hip_compile_option
generate_args_hpp
(
options
.
virtual_inputs
.
empty
()
?
options
.
inputs
:
options
.
virtual_inputs
);
generate_args_hpp
(
options
.
virtual_inputs
.
empty
()
?
options
.
inputs
:
options
.
virtual_inputs
);
srcs
.
emplace_back
(
"args.hpp"
,
args_hpp
);
srcs
.
emplace_back
(
"args.hpp"
,
args_hpp
);
if
(
options
.
global
%
options
.
local
!=
0
and
hip_accept_non_uniform_wg
())
options
.
params
+=
" -fno-offload-uniform-block"
;
else
assert
(
options
.
global
%
options
.
local
==
0
);
options
.
params
+=
" -DMIGRAPHX_NGLOBAL="
+
std
::
to_string
(
options
.
global
);
options
.
params
+=
" -DMIGRAPHX_NGLOBAL="
+
std
::
to_string
(
options
.
global
);
options
.
params
+=
" -DMIGRAPHX_NLOCAL="
+
std
::
to_string
(
options
.
local
);
options
.
params
+=
" -DMIGRAPHX_NLOCAL="
+
std
::
to_string
(
options
.
local
);
options
.
params
+=
" "
+
join_strings
(
compiler_warnings
(),
" "
);
options
.
params
+=
" "
+
join_strings
(
compiler_warnings
(),
" "
);
...
...
src/targets/gpu/compile_ops.cpp
View file @
ffcb68b4
...
@@ -37,6 +37,7 @@ inline namespace MIGRAPHX_INLINE_NS {
...
@@ -37,6 +37,7 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace
gpu
{
namespace
gpu
{
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_GPU_COMPILE_PARALLEL
);
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_GPU_COMPILE_PARALLEL
);
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_TRACE_BENCHMARKING
);
struct
precompile_op
struct
precompile_op
{
{
...
@@ -179,14 +180,29 @@ struct compile_plan
...
@@ -179,14 +180,29 @@ struct compile_plan
MIGRAPHX_THROW
(
"Multiple kernels without config"
);
MIGRAPHX_THROW
(
"Multiple kernels without config"
);
std
::
cout
<<
"Benchmarking "
<<
preop
.
name
()
<<
": "
<<
results
.
size
()
<<
" configs"
std
::
cout
<<
"Benchmarking "
<<
preop
.
name
()
<<
": "
<<
results
.
size
()
<<
" configs"
<<
std
::
endl
;
<<
std
::
endl
;
if
(
enabled
(
MIGRAPHX_TRACE_BENCHMARKING
{}))
std
::
cout
<<
"Problem: "
<<
config
->
problem
<<
std
::
endl
;
std
::
vector
<
double
>
times
;
std
::
vector
<
double
>
times
;
times
.
reserve
(
results
.
size
());
times
.
reserve
(
results
.
size
());
std
::
transform
(
std
::
transform
(
results
.
begin
(),
results
.
begin
(),
results
.
end
(),
std
::
back_inserter
(
times
),
[
&
](
const
auto
&
cr
)
{
results
.
end
(),
if
(
not
cr
.
has_value
())
config
->
solutions
.
begin
(),
return
std
::
numeric_limits
<
double
>::
max
();
std
::
back_inserter
(
times
),
return
time_op
(
*
ctx
,
cr
->
replace
.
code_object
,
to_shapes
(
cr
->
ins
->
inputs
()),
20
);
[
&
](
const
auto
&
cr
,
const
auto
&
solution
)
{
});
if
(
enabled
(
MIGRAPHX_TRACE_BENCHMARKING
{}))
std
::
cout
<<
"Benchmarking solution: "
<<
solution
<<
std
::
endl
;
if
(
not
cr
.
has_value
())
{
if
(
enabled
(
MIGRAPHX_TRACE_BENCHMARKING
{}))
std
::
cout
<<
"No binary"
<<
std
::
endl
;
return
std
::
numeric_limits
<
double
>::
max
();
}
auto
t
=
time_op
(
*
ctx
,
cr
->
replace
.
code_object
,
to_shapes
(
cr
->
ins
->
inputs
()),
20
);
if
(
enabled
(
MIGRAPHX_TRACE_BENCHMARKING
{}))
std
::
cout
<<
t
<<
"ms"
<<
std
::
endl
;
return
t
;
});
auto
i
=
std
::
distance
(
times
.
begin
(),
std
::
min_element
(
times
.
begin
(),
times
.
end
()));
auto
i
=
std
::
distance
(
times
.
begin
(),
std
::
min_element
(
times
.
begin
(),
times
.
end
()));
std
::
cout
<<
"Fastest solution: "
<<
config
->
solutions
.
at
(
i
)
<<
std
::
endl
;
std
::
cout
<<
"Fastest solution: "
<<
config
->
solutions
.
at
(
i
)
<<
std
::
endl
;
pc
.
insert
(
preop
.
name
(),
config
->
problem
,
config
->
solutions
.
at
(
i
));
pc
.
insert
(
preop
.
name
(),
config
->
problem
,
config
->
solutions
.
at
(
i
));
...
...
src/targets/gpu/device/argmax.cpp
View file @
ffcb68b4
/*
/*
* The MIT License (MIT)
* The MIT License (MIT)
*
*
* Copyright (c) 2015-202
2
Advanced Micro Devices, Inc. All rights reserved.
* Copyright (c) 2015-202
3
Advanced Micro Devices, Inc. All rights reserved.
*
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* of this software and associated documentation files (the "Software"), to deal
...
@@ -34,9 +34,16 @@ inline namespace MIGRAPHX_INLINE_NS {
...
@@ -34,9 +34,16 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace
gpu
{
namespace
gpu
{
namespace
device
{
namespace
device
{
void
argmax
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
,
int64_t
axis
)
void
argmax
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
,
int64_t
axis
,
bool
select_last_index
)
{
{
arg_op
(
argmax_op
{},
stream
,
result
,
arg
,
axis
);
if
(
select_last_index
)
arg_op
(
argmax_op_last_index
{},
stream
,
result
,
arg
,
axis
);
else
arg_op
(
argmax_op_first_index
{},
stream
,
result
,
arg
,
axis
);
}
}
}
// namespace device
}
// namespace device
...
...
src/targets/gpu/device/argmin.cpp
View file @
ffcb68b4
/*
/*
* The MIT License (MIT)
* The MIT License (MIT)
*
*
* Copyright (c) 2015-202
2
Advanced Micro Devices, Inc. All rights reserved.
* Copyright (c) 2015-202
3
Advanced Micro Devices, Inc. All rights reserved.
*
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* of this software and associated documentation files (the "Software"), to deal
...
@@ -34,9 +34,16 @@ inline namespace MIGRAPHX_INLINE_NS {
...
@@ -34,9 +34,16 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace
gpu
{
namespace
gpu
{
namespace
device
{
namespace
device
{
void
argmin
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
,
int64_t
axis
)
void
argmin
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
,
int64_t
axis
,
bool
select_last_index
)
{
{
arg_op
(
argmin_op
{},
stream
,
result
,
arg
,
axis
);
if
(
select_last_index
)
arg_op
(
argmin_op_last_index
{},
stream
,
result
,
arg
,
axis
);
else
arg_op
(
argmin_op_first_index
{},
stream
,
result
,
arg
,
axis
);
}
}
}
// namespace device
}
// namespace device
...
...
src/targets/gpu/fuse_ck.cpp
View file @
ffcb68b4
...
@@ -26,6 +26,7 @@
...
@@ -26,6 +26,7 @@
#include <migraphx/matcher.hpp>
#include <migraphx/matcher.hpp>
#include <migraphx/pass_manager.hpp>
#include <migraphx/pass_manager.hpp>
#include <migraphx/register_op.hpp>
#include <migraphx/register_op.hpp>
#include <migraphx/gpu/device_name.hpp>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
@@ -92,6 +93,8 @@ MIGRAPHX_PRED_MATCHER(is_ck_gemm, instruction_ref ins)
...
@@ -92,6 +93,8 @@ MIGRAPHX_PRED_MATCHER(is_ck_gemm, instruction_ref ins)
auto
m
=
a
.
lens
()[
a
.
lens
().
size
()
-
2
];
auto
m
=
a
.
lens
()[
a
.
lens
().
size
()
-
2
];
auto
n
=
b
.
lens
().
back
();
auto
n
=
b
.
lens
().
back
();
auto
k
=
a
.
lens
().
back
();
auto
k
=
a
.
lens
().
back
();
auto
batch_size
=
std
::
accumulate
(
a
.
lens
().
rbegin
()
+
2
,
a
.
lens
().
rend
(),
std
::
size_t
{
1
},
std
::
multiplies
<
std
::
size_t
>
());
// Integer gemms must be divisible by 4 in ck
// Integer gemms must be divisible by 4 in ck
if
(
contains
({
shape
::
int8_type
,
shape
::
int32_type
},
ins
->
get_shape
().
type
()))
if
(
contains
({
shape
::
int8_type
,
shape
::
int32_type
},
ins
->
get_shape
().
type
()))
{
{
...
@@ -102,9 +105,17 @@ MIGRAPHX_PRED_MATCHER(is_ck_gemm, instruction_ref ins)
...
@@ -102,9 +105,17 @@ MIGRAPHX_PRED_MATCHER(is_ck_gemm, instruction_ref ins)
if
(
k
%
4
!=
0
)
if
(
k
%
4
!=
0
)
return
false
;
return
false
;
}
}
// Skipping GEMMs with a K dimension greater than 2048 is a course-grained strategy
auto
device_name
=
trim
(
split_string
(
get_device_name
(),
':'
).
front
());
// to avoid poor-performing GEMM kernels from CK
if
(
device_name
==
"gfx940"
)
// To-do: Investigate a more precise strategy
{
if
(
ins
->
get_shape
().
type
()
==
shape
::
half_type
)
{
if
(
batch_size
>=
64
)
return
m
<
2048
or
k
<=
64
or
n
<=
384
or
n
>=
2048
;
return
true
;
}
return
true
;
}
return
k
<=
2048
;
return
k
<=
2048
;
}
}
...
@@ -140,6 +151,10 @@ struct find_ck_gemm_pointwise
...
@@ -140,6 +151,10 @@ struct find_ck_gemm_pointwise
return
not
input
->
inputs
().
empty
()
and
input
->
inputs
().
front
()
->
name
()
==
"capture"
;
return
not
input
->
inputs
().
empty
()
and
input
->
inputs
().
front
()
->
name
()
==
"capture"
;
}))
}))
return
;
return
;
if
(
std
::
any_of
(
ins
->
inputs
().
begin
(),
ins
->
inputs
().
end
(),
[](
auto
input
)
{
return
not
input
->
inputs
().
empty
()
and
input
->
inputs
().
front
()
->
name
()
==
"capture"
;
}))
return
;
assert
(
gemm_it
!=
inputs
.
end
());
assert
(
gemm_it
!=
inputs
.
end
());
if
(
gemm_idx
!=
0
)
if
(
gemm_idx
!=
0
)
{
{
...
...
src/targets/gpu/include/migraphx/gpu/convolution.hpp
View file @
ffcb68b4
...
@@ -199,9 +199,9 @@ struct miopen_convolution
...
@@ -199,9 +199,9 @@ struct miopen_convolution
// MIOpen has APIs to pass pre-allocated buffers starting from rocm-5.6
// MIOpen has APIs to pass pre-allocated buffers starting from rocm-5.6
preallocate
=
true
;
preallocate
=
true
;
#endif
#endif
auto
x
=
preallocate
?
to_gpu
(
generate_argument
(
x_shape
))
:
inputs
[
0
];
auto
x
=
preallocate
?
to_gpu
(
generate_argument
(
x_shape
))
:
argument
{
inputs
[
0
]
}
;
auto
w
=
preallocate
?
to_gpu
(
generate_argument
(
w_shape
))
:
inputs
[
1
];
auto
w
=
preallocate
?
to_gpu
(
generate_argument
(
w_shape
))
:
argument
{
inputs
[
1
]
}
;
auto
y
=
preallocate
?
allocate_gpu
(
output_shape
)
:
inputs
[
2
];
auto
y
=
preallocate
?
allocate_gpu
(
output_shape
)
:
argument
{
inputs
[
2
]
}
;
auto
workspace
=
auto
workspace
=
preallocate
?
allocate_gpu
(
workspace_shape
)
:
migraphx
::
argument
(
workspace_shape
);
preallocate
?
allocate_gpu
(
workspace_shape
)
:
migraphx
::
argument
(
workspace_shape
);
...
...
src/targets/gpu/include/migraphx/gpu/device/arg_op.hpp
View file @
ffcb68b4
/*
/*
* The MIT License (MIT)
* The MIT License (MIT)
*
*
* Copyright (c) 2015-202
2
Advanced Micro Devices, Inc. All rights reserved.
* Copyright (c) 2015-202
3
Advanced Micro Devices, Inc. All rights reserved.
*
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* of this software and associated documentation files (the "Software"), to deal
...
@@ -55,7 +55,7 @@ MIGRAPHX_DEVICE_CONSTEXPR val_index<T> make_val_index(T v, int64_t i)
...
@@ -55,7 +55,7 @@ MIGRAPHX_DEVICE_CONSTEXPR val_index<T> make_val_index(T v, int64_t i)
return
{
v
,
i
};
return
{
v
,
i
};
}
}
struct
argmax_op
struct
argmax_op
_first_index
{
{
template
<
class
T
>
template
<
class
T
>
MIGRAPHX_DEVICE_CONSTEXPR
val_index
<
T
>
operator
()(
val_index
<
T
>
x
,
val_index
<
T
>
y
)
const
MIGRAPHX_DEVICE_CONSTEXPR
val_index
<
T
>
operator
()(
val_index
<
T
>
x
,
val_index
<
T
>
y
)
const
...
@@ -73,7 +73,25 @@ struct argmax_op
...
@@ -73,7 +73,25 @@ struct argmax_op
MIGRAPHX_DEVICE_CONSTEXPR
auto
init
()
const
{
return
lowest
();
}
MIGRAPHX_DEVICE_CONSTEXPR
auto
init
()
const
{
return
lowest
();
}
};
};
struct
argmin_op
struct
argmax_op_last_index
{
template
<
class
T
>
MIGRAPHX_DEVICE_CONSTEXPR
val_index
<
T
>
operator
()(
val_index
<
T
>
x
,
val_index
<
T
>
y
)
const
{
if
(
x
.
val
>
y
.
val
)
return
x
;
else
if
(
x
.
val
<
y
.
val
)
return
y
;
else
{
return
(
x
.
index
>
y
.
index
)
?
x
:
y
;
}
}
MIGRAPHX_DEVICE_CONSTEXPR
auto
init
()
const
{
return
lowest
();
}
};
struct
argmin_op_first_index
{
{
template
<
class
T
>
template
<
class
T
>
MIGRAPHX_DEVICE_CONSTEXPR
val_index
<
T
>
operator
()(
val_index
<
T
>
x
,
val_index
<
T
>
y
)
const
MIGRAPHX_DEVICE_CONSTEXPR
val_index
<
T
>
operator
()(
val_index
<
T
>
x
,
val_index
<
T
>
y
)
const
...
@@ -91,6 +109,24 @@ struct argmin_op
...
@@ -91,6 +109,24 @@ struct argmin_op
MIGRAPHX_DEVICE_CONSTEXPR
auto
init
()
const
{
return
highest
();
}
MIGRAPHX_DEVICE_CONSTEXPR
auto
init
()
const
{
return
highest
();
}
};
};
struct
argmin_op_last_index
{
template
<
class
T
>
MIGRAPHX_DEVICE_CONSTEXPR
val_index
<
T
>
operator
()(
val_index
<
T
>
x
,
val_index
<
T
>
y
)
const
{
if
(
x
.
val
<
y
.
val
)
return
x
;
else
if
(
x
.
val
>
y
.
val
)
return
y
;
else
{
return
(
x
.
index
>
y
.
index
)
?
x
:
y
;
}
}
MIGRAPHX_DEVICE_CONSTEXPR
auto
init
()
const
{
return
highest
();
}
};
template
<
class
Op
>
template
<
class
Op
>
void
arg_op
(
Op
op
,
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
,
int64_t
axis
)
void
arg_op
(
Op
op
,
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
,
int64_t
axis
)
{
{
...
...
src/targets/gpu/include/migraphx/gpu/device/argmax.hpp
View file @
ffcb68b4
/*
/*
* The MIT License (MIT)
* The MIT License (MIT)
*
*
* Copyright (c) 2015-202
2
Advanced Micro Devices, Inc. All rights reserved.
* Copyright (c) 2015-202
3
Advanced Micro Devices, Inc. All rights reserved.
*
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* of this software and associated documentation files (the "Software"), to deal
...
@@ -36,7 +36,8 @@ namespace device {
...
@@ -36,7 +36,8 @@ namespace device {
void
MIGRAPHX_DEVICE_EXPORT
argmax
(
hipStream_t
stream
,
void
MIGRAPHX_DEVICE_EXPORT
argmax
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
result
,
const
argument
&
arg
,
const
argument
&
arg
,
int64_t
axis
);
int64_t
axis
,
bool
select_last_index
);
}
// namespace device
}
// namespace device
}
// namespace gpu
}
// namespace gpu
...
...
src/targets/gpu/include/migraphx/gpu/device/argmin.hpp
View file @
ffcb68b4
/*
/*
* The MIT License (MIT)
* The MIT License (MIT)
*
*
* Copyright (c) 2015-202
2
Advanced Micro Devices, Inc. All rights reserved.
* Copyright (c) 2015-202
3
Advanced Micro Devices, Inc. All rights reserved.
*
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* of this software and associated documentation files (the "Software"), to deal
...
@@ -36,7 +36,8 @@ namespace device {
...
@@ -36,7 +36,8 @@ namespace device {
void
MIGRAPHX_DEVICE_EXPORT
argmin
(
hipStream_t
stream
,
void
MIGRAPHX_DEVICE_EXPORT
argmin
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
result
,
const
argument
&
arg
,
const
argument
&
arg
,
int64_t
axis
);
int64_t
axis
,
bool
select_last_index
);
}
// namespace device
}
// namespace device
}
// namespace gpu
}
// namespace gpu
...
...
src/targets/gpu/kernels/include/migraphx/kernels/index.hpp
View file @
ffcb68b4
...
@@ -31,6 +31,14 @@
...
@@ -31,6 +31,14 @@
#include <migraphx/kernels/debug.hpp>
#include <migraphx/kernels/debug.hpp>
#include <migraphx/kernels/functional.hpp>
#include <migraphx/kernels/functional.hpp>
#ifdef __clang__
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wreserved-identifier"
extern
"C"
__device__
size_t
__ockl_get_enqueued_local_size
(
uint
);
// NOLINT
extern
"C"
__device__
size_t
__ockl_get_local_size
(
uint
);
// NOLINT
#pragma clang diagnostic pop
#endif
namespace
migraphx
{
namespace
migraphx
{
#if defined(MIGRAPHX_NGLOBAL) && defined(MIGRAPHX_NLOCAL)
#if defined(MIGRAPHX_NGLOBAL) && defined(MIGRAPHX_NLOCAL)
...
@@ -45,43 +53,37 @@ inline __device__ __attribute__((const)) index_int compute_global_size()
...
@@ -45,43 +53,37 @@ inline __device__ __attribute__((const)) index_int compute_global_size()
// This actualy works even when global is not divisible by local size.
// This actualy works even when global is not divisible by local size.
// This doesnt actually do a multiplicatiosn. Instead it calls a device
// This doesnt actually do a multiplicatiosn. Instead it calls a device
// function to get the global size, which is why it works.
// function to get the global size, which is why it works.
return
blockDim
.
x
*
gridDim
.
x
;
// NOLINT
return
blockDim
.
x
*
gridDim
.
x
;
// NOLINT
#endif
#endif
}
}
// We cant just use blockDim.x to get the local size since its broken on hip
#ifdef MIGRAPHX_NGROUP
// when global is not divisible by local size. In this case, we calulate the
// If global is divisible by local then local can be a const
// size for the last group.
#if(MIGRAPHX_NGLOBAL % MIGRAPHX_NLOCAL == 0) || (MIGRAPHX_NGROUP == 1)
#define MIGRAPHX_HAS_CONST_LOCAL 1
#endif
#endif
inline
__device__
__attribute__
((
const
))
index_int
compute_local_size
()
inline
__device__
__attribute__
((
const
))
index_int
compute_local_size
()
{
{
#ifdef MIGRAPHX_NLOCAL
#ifdef MIGRAPHX_HAS_CONST_LOCAL
const
auto
nlocal
=
MIGRAPHX_NLOCAL
;
return
MIGRAPHX_NLOCAL
;
#else
const
auto
nlocal
=
blockDim
.
x
;
// NOLINT
#endif
#ifdef MIGRAPHX_NGROUP
const
auto
ngroup
=
MIGRAPHX_NGROUP
;
#else
#else
const
auto
ngroup
=
gridDim
.
x
;
// NOLINT
// Returns block size. For the non-uniform block it returns the size of the non-uniform block.
return
__ockl_get_local_size
(
0
);
// NOLINT
#endif
#endif
const
auto
group_id
=
blockIdx
.
x
;
// NOLINT
const
auto
nglobal
=
compute_global_size
();
if
(
group_id
==
ngroup
-
1
)
{
return
1
+
(
nglobal
-
1
)
%
nlocal
;
}
else
{
return
nlocal
;
// NOLINT
}
}
}
#ifdef MIGRAPHX_NGROUP
inline
__device__
__attribute__
((
const
))
index_int
compute_max_local_size
()
// If global is divisible by local then local can be a const
{
#if(MIGRAPHX_NGLOBAL % MIGRAPHX_NLOCAL == 0) || (MIGRAPHX_NGROUP == 1)
#ifdef MIGRAPHX_LOCAL
#define MIGRAPHX_HAS_CONST_LOCAL 1
return
MIGRAPHX_NLOCAL
;
#endif
#else
// Returns the block size. When workgrop has non-uniform block, this returns size of the uniform
// block.
return
__ockl_get_enqueued_local_size
(
0
);
// NOLINT
#endif
#endif
}
struct
index
struct
index
{
{
...
@@ -126,8 +128,8 @@ struct index
...
@@ -126,8 +128,8 @@ struct index
#else
#else
__device__
index_int
max_nlocal
()
const
__device__
index_int
max_nlocal
()
const
{
{
MIGRAPHX_ASSERT
(
blockDim
.
x
>
0
);
MIGRAPHX_ASSERT
(
compute_max_local_size
()
>
0
);
return
blockDim
.
x
;
return
compute_max_local_size
()
;
}
}
#endif
#endif
...
@@ -249,7 +251,8 @@ struct index
...
@@ -249,7 +251,8 @@ struct index
#endif
#endif
inline
__device__
__attribute__
((
const
))
index
make_index
()
inline
__device__
__attribute__
((
const
))
index
make_index
()
{
{
return
index
{
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
,
threadIdx
.
x
,
blockIdx
.
x
};
// NOLINT
return
index
{
blockIdx
.
x
*
compute_max_local_size
()
+
threadIdx
.
x
,
threadIdx
.
x
,
blockIdx
.
x
};
// NOLINT
}
}
}
// namespace migraphx
}
// namespace migraphx
...
...
test/eliminate_allocation_test.cpp
View file @
ffcb68b4
...
@@ -55,7 +55,7 @@ struct allocate
...
@@ -55,7 +55,7 @@ struct allocate
const
migraphx
::
shape
&
output_shape
,
const
migraphx
::
shape
&
output_shape
,
const
std
::
vector
<
migraphx
::
argument
>&
)
const
const
std
::
vector
<
migraphx
::
argument
>&
)
const
{
{
return
{
output_shape
};
return
migraphx
::
argument
{
output_shape
};
}
}
};
};
...
...
test/eliminate_concat_test.cpp
View file @
ffcb68b4
...
@@ -60,7 +60,7 @@ struct concat
...
@@ -60,7 +60,7 @@ struct concat
const
migraphx
::
shape
&
output_shape
,
const
migraphx
::
shape
&
output_shape
,
const
std
::
vector
<
migraphx
::
argument
>&
)
const
const
std
::
vector
<
migraphx
::
argument
>&
)
const
{
{
return
{
output_shape
};
return
migraphx
::
argument
{
output_shape
};
}
}
};
};
...
@@ -104,7 +104,7 @@ struct allocate
...
@@ -104,7 +104,7 @@ struct allocate
const
migraphx
::
shape
&
output_shape
,
const
migraphx
::
shape
&
output_shape
,
const
std
::
vector
<
migraphx
::
argument
>&
)
const
const
std
::
vector
<
migraphx
::
argument
>&
)
const
{
{
return
{
output_shape
};
return
migraphx
::
argument
{
output_shape
};
}
}
};
};
...
...
test/memory_coloring_test.cpp
View file @
ffcb68b4
...
@@ -55,7 +55,7 @@ struct allocate
...
@@ -55,7 +55,7 @@ struct allocate
const
migraphx
::
shape
&
output_shape
,
const
migraphx
::
shape
&
output_shape
,
const
std
::
vector
<
migraphx
::
argument
>&
)
const
const
std
::
vector
<
migraphx
::
argument
>&
)
const
{
{
return
{
output_shape
};
return
migraphx
::
argument
{
output_shape
};
}
}
};
};
...
...
test/normalize_ops_test.cpp
View file @
ffcb68b4
...
@@ -57,7 +57,7 @@ struct normalize_test_op
...
@@ -57,7 +57,7 @@ struct normalize_test_op
const
migraphx
::
shape
&
output_shape
,
const
migraphx
::
shape
&
output_shape
,
const
std
::
vector
<
migraphx
::
argument
>&
)
const
const
std
::
vector
<
migraphx
::
argument
>&
)
const
{
{
return
{
output_shape
};
return
migraphx
::
argument
{
output_shape
};
}
}
};
};
...
...
test/onnx/.onnxrt-commit
View file @
ffcb68b4
6
d7bc2a097a1a08541cd0d4628831c79ab8092d5
6
35d3faa3b3908d2806d009dc6872152cfcfcdda
test/onnx/argmax_select_last_index_test.onnx
0 → 100644
View file @
ffcb68b4
File added
Prev
1
2
3
4
5
6
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment