Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
2ab23275
Commit
2ab23275
authored
Jun 16, 2023
by
Alan Turner
Browse files
Merge remote-tracking branch 'origin/develop' into ck-integration-tuning
parents
2c3563dd
013d4829
Changes
25
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
442 additions
and
102 deletions
+442
-102
cmake/Embed.cmake
cmake/Embed.cmake
+76
-15
src/common.cpp
src/common.cpp
+17
-2
src/driver/main.cpp
src/driver/main.cpp
+21
-0
src/driver/perf.cpp
src/driver/perf.cpp
+34
-0
src/driver/perf.hpp
src/driver/perf.hpp
+9
-0
src/include/migraphx/matcher.hpp
src/include/migraphx/matcher.hpp
+9
-9
src/include/migraphx/op/broadcast.hpp
src/include/migraphx/op/broadcast.hpp
+3
-0
src/include/migraphx/op/multibroadcast.hpp
src/include/migraphx/op/multibroadcast.hpp
+8
-3
src/include/migraphx/op/reshape.hpp
src/include/migraphx/op/reshape.hpp
+117
-5
src/include/migraphx/shape.hpp
src/include/migraphx/shape.hpp
+1
-1
src/onnx/onnx_parser.cpp
src/onnx/onnx_parser.cpp
+19
-0
src/onnx/parse_instancenorm.cpp
src/onnx/parse_instancenorm.cpp
+28
-18
src/program.cpp
src/program.cpp
+47
-33
src/targets/gpu/CMakeLists.txt
src/targets/gpu/CMakeLists.txt
+5
-6
src/targets/gpu/hip.cpp
src/targets/gpu/hip.cpp
+5
-1
src/targets/gpu/jit/ck_gemm.cpp
src/targets/gpu/jit/ck_gemm.cpp
+1
-1
src/targets/gpu/kernels/include/migraphx/kernels/math.hpp
src/targets/gpu/kernels/include/migraphx/kernels/math.hpp
+2
-4
src/value.cpp
src/value.cpp
+3
-0
test/api/test_gpu.cpp
test/api/test_gpu.cpp
+4
-4
test/onnx/gen_onnx.py
test/onnx/gen_onnx.py
+33
-0
No files found.
cmake/Embed.cmake
View file @
2ab23275
...
...
@@ -24,6 +24,40 @@
find_program
(
EMBED_LD ld
)
find_program
(
EMBED_OBJCOPY objcopy
)
if
(
LINUX
)
option
(
EMBED_USE_LD
"Use ld to embed data files"
ON
)
else
()
option
(
EMBED_USE_LD
"Use ld to embed data files"
OFF
)
endif
()
function
(
wrap_string
)
set
(
options
)
set
(
oneValueArgs VARIABLE AT_COLUMN
)
set
(
multiValueArgs
)
cmake_parse_arguments
(
PARSE
"
${
options
}
"
"
${
oneValueArgs
}
"
"
${
multiValueArgs
}
"
${
ARGN
}
)
cmake_parse_arguments
(
WRAP_STRING
"
${
options
}
"
"
${
oneValueArgs
}
"
""
${
ARGN
}
)
string
(
LENGTH
${${
PARSE_VARIABLE
}}
string_length
)
math
(
EXPR offset
"0"
)
while
(
string_length GREATER 0
)
if
(
string_length GREATER
${
PARSE_AT_COLUMN
}
)
math
(
EXPR length
"
${
PARSE_AT_COLUMN
}
"
)
else
()
math
(
EXPR length
"
${
string_length
}
"
)
endif
()
string
(
SUBSTRING
${${
PARSE_VARIABLE
}}
${
offset
}
${
length
}
line
)
set
(
lines
"
${
lines
}
\n
${
line
}
"
)
math
(
EXPR string_length
"
${
string_length
}
-
${
length
}
"
)
math
(
EXPR offset
"
${
offset
}
+
${
length
}
"
)
endwhile
()
set
(
${
PARSE_VARIABLE
}
"
${
lines
}
"
PARENT_SCOPE
)
endfunction
()
function
(
generate_embed_source EMBED_NAME
)
set
(
options
)
set
(
oneValueArgs SRC HEADER
)
...
...
@@ -46,14 +80,21 @@ function(generate_embed_source EMBED_NAME)
list
(
GET PARSE_OBJECTS
${
idx
}
OBJECT
)
set
(
START_SYMBOL
"_binary_
${
SYMBOL
}
_start"
)
set
(
END_SYMBOL
"_binary_
${
SYMBOL
}
_end"
)
if
(
EMBED_USE_LD
)
string
(
APPEND EXTERNS
"
extern const char
${
START_SYMBOL
}
[];
extern const char
${
END_SYMBOL
}
[];
"
)
else
()
string
(
APPEND EXTERNS
"
extern const char
${
START_SYMBOL
}
[];
extern const char*
${
END_SYMBOL
}
;
"
)
endif
()
# TODO: Should use NAME_WLE
get_filename_component
(
BASE_NAME
"
${
OBJECT
}
"
NAME
)
string
(
REGEX REPLACE
".[A-Za-z0-9_]$"
""
BASE_NAME
${
BASE_NAME
}
)
string
(
REGEX REPLACE
".[A-Za-z0-9_]
+
$"
""
BASE_NAME
${
BASE_NAME
}
)
string
(
APPEND INIT_KERNELS
"
{
\"
${
BASE_NAME
}
\"
, {
${
START_SYMBOL
}
,
${
END_SYMBOL
}
} },
...
...
@@ -86,9 +127,14 @@ function(embed_file OUTPUT_FILE OUTPUT_SYMBOL FILE)
string
(
MAKE_C_IDENTIFIER
"
${
REL_FILE
}
"
SYMBOL
)
get_filename_component
(
OUTPUT_FILE_DIR
"
${
REL_FILE
}
"
DIRECTORY
)
file
(
MAKE_DIRECTORY
"
${
CMAKE_CURRENT_BINARY_DIR
}
/
${
OUTPUT_FILE_DIR
}
"
)
if
(
EMBED_USE_LD
)
set
(
OUT_FILE
"
${
CMAKE_CURRENT_BINARY_DIR
}
/
${
REL_FILE
}
.o"
)
else
()
set
(
OUT_FILE
"
${
CMAKE_CURRENT_BINARY_DIR
}
/
${
REL_FILE
}
.cpp"
)
endif
()
set
(
${
OUTPUT_SYMBOL
}
${
SYMBOL
}
PARENT_SCOPE
)
set
(
${
OUTPUT_FILE
}
"
${
OUT_FILE
}
"
PARENT_SCOPE
)
if
(
EMBED_USE_LD
)
add_custom_command
(
OUTPUT
"
${
OUT_FILE
}
"
COMMAND
${
EMBED_LD
}
-r -o
"
${
OUT_FILE
}
"
-z noexecstack --format=binary
"
${
REL_FILE
}
"
...
...
@@ -97,6 +143,21 @@ function(embed_file OUTPUT_FILE OUTPUT_SYMBOL FILE)
DEPENDS
${
FILE
}
VERBATIM
)
else
()
set_property
(
DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS
${
FILE
}
)
# reads source file contents as hex string
file
(
READ
${
FILE
}
HEX_STRING HEX
)
# wraps the hex string into multiple lines
wrap_string
(
VARIABLE HEX_STRING AT_COLUMN 80
)
# adds '0x' prefix and comma suffix before and after every byte respectively
string
(
REGEX REPLACE
"([0-9a-f][0-9a-f])"
"0x
\\
1, "
ARRAY_VALUES
${
HEX_STRING
}
)
# removes trailing comma
string
(
REGEX REPLACE
", $"
""
ARRAY_VALUES
${
ARRAY_VALUES
}
)
file
(
WRITE
"
${
OUT_FILE
}
"
"
extern const char _binary_
${
SYMBOL
}
_start[] = {
${
ARRAY_VALUES
}
};
extern const char* _binary_
${
SYMBOL
}
_end = _binary_
${
SYMBOL
}
_start + sizeof(_binary_
${
SYMBOL
}
_start);
\n
"
)
endif
()
endforeach
()
endfunction
()
...
...
@@ -119,6 +180,6 @@ function(add_embed_library EMBED_NAME)
generate_embed_source
(
${
EMBED_NAME
}
SRC
${
SRC_FILE
}
HEADER
${
HEADER_FILE
}
OBJECTS
${
OUTPUT_FILES
}
SYMBOLS
${
SYMBOLS
}
)
add_library
(
${
EMBED_NAME
}
STATIC
${
OUTPUT_FILES
}
"
${
SRC_FILE
}
"
)
target_include_directories
(
${
EMBED_NAME
}
PUBLIC
"
${
EMBED_DIR
}
/include"
)
target_compile_options
(
${
EMBED_NAME
}
PRIVATE -Wno-reserved-identifier
)
target_compile_options
(
${
EMBED_NAME
}
PRIVATE -Wno-reserved-identifier
-Wno-extern-initializer -Wno-missing-variable-declarations
)
set_target_properties
(
${
EMBED_NAME
}
PROPERTIES POSITION_INDEPENDENT_CODE On
)
endfunction
()
src/common.cpp
View file @
2ab23275
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-202
2
Advanced Micro Devices, Inc. All rights reserved.
* Copyright (c) 2015-202
3
Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
...
...
@@ -148,6 +148,18 @@ shape common_shape(const std::vector<shape>& shapes)
return
{
compute_common_types
(
shapes
),
compute_common_lens
(
shapes
)};
}
/**
* @brief Creates and adds instructions to convert input arguments to common shapes and types
* by adding multi-broadcast and type convert operations. This is a utility function for creating
* operations where the shape and type of inputs need to match. It supports both dynamic and
* static-shaped arguments.
*
* @param m containing module for instruction
* @param ins insertion location in instruction list
* @param inputs instructions to use as argument list; also, the shapes
* attached to each instruction_ref are considered for broadcasting
* @return std::vector<instruction_ref> a modified argument list
*/
std
::
vector
<
instruction_ref
>
insert_common_args
(
module
&
m
,
instruction_ref
ins
,
std
::
vector
<
instruction_ref
>
inputs
)
{
...
...
@@ -158,7 +170,7 @@ insert_common_args(module& m, instruction_ref ins, std::vector<instruction_ref>
if
(
inputs
.
size
()
!=
2
)
{
MIGRAPHX_THROW
(
"INSERT_COMMON_OP: not handled; "
+
migraphx
::
to_string
(
inputs
.
size
())
+
"inputs
, only handle
two inputs if any are dynamic shape"
);
"
inputs
. Requires exactly
two inputs if any are dynamic shape"
);
}
auto
c_type
=
compute_common_types
(
to_shapes
(
inputs
));
...
...
@@ -224,6 +236,9 @@ instruction_ref insert_common_op(module& m,
return
m
.
insert_instruction
(
ins
,
op
,
insert_common_args
(
m
,
ins
,
std
::
move
(
inputs
)));
}
/**
* Wrapper for insert_common_args() which inserts operation at the end of the module.
*/
instruction_ref
add_common_op
(
module
&
m
,
const
operation
&
op
,
std
::
vector
<
instruction_ref
>
inputs
)
{
return
insert_common_op
(
m
,
m
.
end
(),
op
,
std
::
move
(
inputs
));
...
...
src/driver/main.cpp
View file @
2ab23275
...
...
@@ -455,8 +455,29 @@ struct compiler
{
auto
p
=
l
.
load
();
// Dont compile if its already been compiled
if
(
p
.
is_compiled
())
{
if
(
ct
.
target_name
==
"gpu"
)
{
if
(
is_offload_copy_set
(
p
)
and
not
co
.
offload_copy
)
{
std
::
cout
<<
"MIGraphX program was likely compiled with offload_copy set, Try "
"passing "
"`--enable-offload-copy` if program run fails.
\n
"
;
}
else
if
(
co
.
offload_copy
)
{
std
::
cout
<<
"MIGraphX program was likely compiled without "
"offload_copy set, Try "
"removing "
"`--enable-offload-copy` flag if passed to driver, if program run "
"fails.
\n
"
;
}
}
return
p
;
}
auto
t
=
ct
.
get_target
();
if
(
to_fp16
)
{
...
...
src/driver/perf.cpp
View file @
2ab23275
...
...
@@ -24,6 +24,8 @@
#include "perf.hpp"
#include <migraphx/generate.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/instruction_ref.hpp>
#include <migraphx/register_target.hpp>
#ifdef HAVE_GPU
#include <migraphx/gpu/hip.hpp>
...
...
@@ -97,6 +99,38 @@ target get_target(bool gpu)
return
make_target
(
"cpu"
);
}
bool
is_offload_copy_set
(
const
program
&
p
)
{
assert
(
p
.
is_compiled
());
const
module
*
mm
=
p
.
get_main_module
();
std
::
vector
<
std
::
string
>
param_names
=
mm
->
get_parameter_names
();
std
::
unordered_set
<
instruction_ref
>
param_ins
;
std
::
transform
(
param_names
.
begin
(),
param_names
.
end
(),
std
::
inserter
(
param_ins
,
param_ins
.
begin
()),
[
&
](
const
auto
&
i
)
{
return
mm
->
get_parameter
(
i
);
});
for
(
const
auto
&
i
:
*
mm
)
{
if
(
i
.
name
()
==
"hip::copy_to_gpu"
)
{
auto
copy_arg
=
instruction
::
get_output_alias
(
i
.
inputs
().
front
(),
true
);
param_ins
.
erase
(
copy_arg
);
}
else
if
(
i
.
name
()
==
"@return"
)
{
auto
return_args
=
i
.
inputs
();
for
(
const
auto
&
j
:
return_args
)
{
auto
alias_ins
=
instruction
::
get_output_alias
(
j
,
true
);
if
((
alias_ins
->
name
()
==
"@param"
&&
param_ins
.
erase
(
alias_ins
)
==
0
)
or
(
alias_ins
->
name
()
!=
"hip::copy_from_gpu"
))
return
false
;
}
}
}
return
param_ins
.
empty
();
}
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace driver
}
// namespace migraphx
src/driver/perf.hpp
View file @
2ab23275
...
...
@@ -39,6 +39,15 @@ parameter_map create_param_map(const program& p, const target& t, bool offload =
parameter_map
fill_param_map
(
parameter_map
&
m
,
const
program
&
p
,
bool
gpu
);
parameter_map
create_param_map
(
const
program
&
p
,
bool
gpu
=
true
);
target
get_target
(
bool
gpu
);
/**
* @brief Checks if MIGraphX program compiled for "GPU" has offload_copy set of not. This is
intended to print a HINT for the users and would not always correctly classify compiled program as
with or without offload_copy in all cases.
* @param p Compiled MIGraphX program for GPU backend
* @return true if program is classified as compiled with "offload_copy" set
*/
bool
is_offload_copy_set
(
const
program
&
p
);
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace driver
...
...
src/include/migraphx/matcher.hpp
View file @
2ab23275
...
...
@@ -135,14 +135,14 @@ template <class M>
auto
bind_match
(
M
m
,
std
::
string
name
)
{
return
make_function_matcher
(
[
=
,
name
=
std
::
move
(
name
)](
matcher_context
&
ctx
,
[
=
,
m_
name
=
std
::
move
(
name
)](
matcher_context
&
ctx
,
instruction_ref
ins
)
->
optional
<
instruction_ref
>
{
auto
result
=
m
.
match
(
ctx
,
ins
);
if
(
result
)
{
if
(
not
ctx
.
has_instruction
(
ins
))
return
nullopt
;
ctx
.
instructions
[
name
]
=
ins
;
ctx
.
instructions
[
m_
name
]
=
ins
;
}
return
result
;
});
...
...
@@ -655,9 +655,9 @@ auto skip_output(Ms... ms)
inline
auto
var
(
std
::
string
s
)
{
return
make_basic_fun_matcher
(
[
=
,
s
=
std
::
move
(
s
)](
const
matcher_context
&
ctx
,
[
=
,
m_
s
=
std
::
move
(
s
)](
const
matcher_context
&
ctx
,
instruction_ref
)
->
optional
<
instruction_ref
>
{
auto
it
=
ctx
.
instructions
.
find
(
s
);
auto
it
=
ctx
.
instructions
.
find
(
m_
s
);
if
(
it
==
ctx
.
instructions
.
end
())
return
nullopt
;
return
it
->
second
;
...
...
@@ -667,7 +667,7 @@ inline auto var(std::string s)
inline
auto
name
(
std
::
string
s
)
{
return
make_basic_pred_matcher
(
[
=
,
s
=
std
::
move
(
s
)](
instruction_ref
ins
)
{
return
ins
->
name
()
==
s
;
});
[
=
,
m_
s
=
std
::
move
(
s
)](
instruction_ref
ins
)
{
return
ins
->
name
()
==
m_
s
;
});
}
inline
auto
name_contains
(
const
std
::
string
&
name
)
...
...
@@ -678,8 +678,8 @@ inline auto name_contains(const std::string& name)
inline
auto
name
(
std
::
unordered_set
<
std
::
string
>
names
)
{
return
make_basic_pred_matcher
([
=
,
names
=
std
::
move
(
names
)](
instruction_ref
ins
)
{
return
names
.
count
(
ins
->
name
())
>
0
;
return
make_basic_pred_matcher
([
=
,
m_
names
=
std
::
move
(
names
)](
instruction_ref
ins
)
{
return
m_
names
.
count
(
ins
->
name
())
>
0
;
});
}
...
...
src/include/migraphx/op/broadcast.hpp
View file @
2ab23275
...
...
@@ -68,6 +68,9 @@ struct broadcast
{
// the ONNX broadcast op is deprecated now, so not handling the negative
// value of axis anymore
if
(
s0
.
dynamic
())
MIGRAPHX_THROW
(
"BROADCAST: Single dynamic input shape not supported. Use two inputs."
);
if
(
axis
>=
broadcast_lens
.
size
())
{
MIGRAPHX_THROW
(
"BROADCAST : axis "
+
migraphx
::
to_string
(
axis
)
+
...
...
src/include/migraphx/op/multibroadcast.hpp
View file @
2ab23275
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-202
2
Advanced Micro Devices, Inc. All rights reserved.
* Copyright (c) 2015-202
3
Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
...
...
@@ -37,8 +37,10 @@ namespace op {
/**
* Broadcast multiple dimensions between two tensors.
* Two versions of this operator: one input and two inputs.
* One input version uses output_lens attribute and broadcasts to it.
* Two inputs version broadcasts both inputs to the common shape at evaluation time.
* One input version uses output_lens attribute and broadcasts to it (does not support
* dynamic shape input).
*
* Two inputs version broadcasts the first input to the common shape of the two inputs.
*/
struct
multibroadcast
{
...
...
@@ -81,6 +83,9 @@ struct multibroadcast
if
(
inputs
.
size
()
==
1
)
{
if
(
s0
.
dynamic
())
MIGRAPHX_THROW
(
"MULTIBROADCAST: Single dynamic input shape not supported. Use two inputs."
);
if
(
s0
.
lens
().
size
()
>
output_lens
.
size
())
{
MIGRAPHX_THROW
(
"MULTIBROADCAST: input dimensions should <= output size"
);
...
...
src/include/migraphx/op/reshape.hpp
View file @
2ab23275
...
...
@@ -29,6 +29,7 @@
#include <migraphx/config.hpp>
#include <migraphx/value.hpp>
#include <migraphx/dyn_output.hpp>
#include <migraphx/optional.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
...
@@ -96,9 +97,115 @@ struct reshape
return
{
s0
.
type
(),
output_dyn_dims
};
}
template
<
class
Iterator
>
static
auto
compute_end_dim
(
Iterator
start
,
Iterator
last
,
std
::
size_t
dim
)
{
std
::
size_t
x
=
1
;
auto
it
=
std
::
find_if
(
start
,
last
,
[
&
](
auto
i
)
{
x
*=
i
;
return
x
>=
dim
;
});
if
(
x
!=
dim
)
return
start
;
return
it
;
}
template
<
class
DimIterator
,
class
StrideIterator
>
static
auto
can_strides_merge
(
DimIterator
dim_start
,
DimIterator
dim_last
,
StrideIterator
stride_start
,
StrideIterator
stride_last
)
{
assert
(
std
::
distance
(
dim_start
,
dim_last
)
==
std
::
distance
(
stride_start
,
stride_last
));
auto
cstride
=
*
std
::
prev
(
stride_last
);
return
std
::
equal
(
std
::
make_reverse_iterator
(
dim_last
),
std
::
make_reverse_iterator
(
dim_start
+
1
),
std
::
make_reverse_iterator
(
stride_last
-
1
),
std
::
make_reverse_iterator
(
stride_start
),
[
&
](
auto
dim
,
auto
stride
)
{
cstride
*=
dim
;
return
stride
==
cstride
;
});
}
// This will reshape the dimesions of the input shape to use the lens of
// `rdims`. If this can't be done without changing memory layout then it
// will return nullopt
static
optional
<
shape
>
reshape_dims
(
const
shape
&
input
,
const
std
::
vector
<
std
::
size_t
>&
rdims
)
{
if
(
input
.
standard
())
return
shape
{
input
.
type
(),
rdims
};
const
auto
&
idims
=
input
.
lens
();
const
auto
&
istrides
=
input
.
strides
();
std
::
vector
<
std
::
size_t
>
rstrides
;
std
::
size_t
i
=
0
;
std
::
size_t
r
=
0
;
while
(
i
<
idims
.
size
()
and
r
<
rdims
.
size
())
{
auto
idim
=
idims
[
i
];
auto
rdim
=
rdims
[
r
];
if
(
rdim
==
idim
)
{
rstrides
.
push_back
(
istrides
[
i
]);
}
// squeeze
else
if
(
rdim
>
idim
)
{
auto
start
=
idims
.
begin
()
+
i
;
auto
it
=
compute_end_dim
(
start
,
idims
.
end
(),
rdim
);
if
(
it
==
start
)
return
nullopt
;
auto
n
=
it
-
start
;
assert
((
i
+
n
)
<=
istrides
.
size
());
if
(
not
can_strides_merge
(
start
,
it
+
1
,
istrides
.
begin
()
+
i
,
istrides
.
begin
()
+
i
+
n
+
1
))
return
nullopt
;
i
+=
n
;
rstrides
.
push_back
(
istrides
[
i
]);
}
// unsqueeze
else
// if(rdim < idim)
{
auto
start
=
rdims
.
begin
()
+
i
;
auto
it
=
compute_end_dim
(
start
,
rdims
.
end
(),
idim
);
if
(
it
==
start
)
return
nullopt
;
auto
n
=
it
-
start
;
assert
((
r
+
n
)
<=
rdims
.
size
());
auto
stride
=
istrides
[
i
]
*
idim
;
std
::
for_each
(
start
,
it
+
1
,
[
&
](
auto
dim
)
{
stride
/=
dim
;
rstrides
.
push_back
(
stride
);
});
r
+=
n
;
}
i
++
;
r
++
;
}
// Handle trailing 1s
if
(
rstrides
.
size
()
<
rdims
.
size
()
and
not
rstrides
.
empty
())
{
auto
stride
=
rstrides
.
back
();
for
(
auto
d
:
range
(
rdims
.
begin
()
+
rstrides
.
size
(),
rdims
.
end
()))
{
if
(
d
!=
1
)
return
nullopt
;
rstrides
.
push_back
(
stride
);
}
}
if
(
rdims
.
size
()
!=
rstrides
.
size
())
return
nullopt
;
return
shape
{
input
.
type
(),
rdims
,
rstrides
};
}
shape
static_compute_shape
(
std
::
vector
<
shape
>
inputs
,
std
::
size_t
n_neg_dims
)
const
{
check_shapes
{
inputs
,
*
this
}.
standard
(
);
check_shapes
{
inputs
,
*
this
}.
has
(
1
);
auto
&&
idims
=
inputs
.
front
().
lens
();
std
::
vector
<
std
::
size_t
>
rdims
(
dims
.
begin
(),
dims
.
end
());
...
...
@@ -125,12 +232,17 @@ struct reshape
}
}
shape
s
{
inputs
.
front
().
type
(),
rdims
};
if
(
s
.
elements
()
!=
inputs
.
front
().
elements
())
auto
s
=
reshape_dims
(
inputs
.
front
(),
rdims
);
if
(
not
s
.
has_value
())
MIGRAPHX_THROW
(
"Reshape on axis that is not packed."
);
if
(
s
->
elements
()
!=
inputs
.
front
().
elements
())
MIGRAPHX_THROW
(
"Reshape: Wrong number of elements for reshape: reshape has "
+
std
::
to_string
(
s
.
elements
())
+
" elements whereas the input has "
+
std
::
to_string
(
s
->
elements
())
+
" elements whereas the input has "
+
std
::
to_string
(
inputs
.
front
().
elements
()));
return
s
;
assert
(
s
->
bytes
()
==
inputs
.
front
().
bytes
());
return
*
s
;
}
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
...
...
src/include/migraphx/shape.hpp
View file @
2ab23275
...
...
@@ -183,7 +183,7 @@ struct shape
const
std
::
vector
<
std
::
size_t
>&
strides
()
const
;
/*!
* The number of dimensions in the shape.
* The number of dimensions in the shape
, either static or dynamic
.
* Same as the number of indices required to get a data value.
*/
std
::
size_t
ndim
()
const
;
...
...
src/onnx/onnx_parser.cpp
View file @
2ab23275
...
...
@@ -149,6 +149,25 @@ instruction_ref onnx_parser::node_info::add_broadcastable_binary_op(const std::s
return
this
->
add_common_op
(
op_name
,
arg0
,
arg1
);
}
/**
* @brief A wrapper for insert_common_args(), which constructs an argument list
* and inserts multibroadcast and convert ops to match inputs to a common shape and type
* as required. The requested operation is placed after the added multibroadcast and convert ops,
* if any, so that their results are transparent to the programmer.
*
* Use add_common_op() to match input sizes when inputs may be
* either static or dynamic.
*
* @param op_name string; Name of operation (op) to add; valid names are the same as
* for make_op()
*
* @param inputs vector of instruction_ref. List of instructions for the new
* operator. Multibroadcast and convert operations, if needed, are deduced from these too.
*
* @return instruction_ref Returns an instruction_ref which is the result of the requested
* operation.
*
*/
instruction_ref
onnx_parser
::
node_info
::
add_common_op
(
const
std
::
string
&
op_name
,
std
::
vector
<
instruction_ref
>
inputs
)
const
{
...
...
src/onnx/parse_instancenorm.cpp
View file @
2ab23275
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-202
2
Advanced Micro Devices, Inc. All rights reserved.
* Copyright (c) 2015-202
3
Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
...
...
@@ -84,16 +84,17 @@ struct parse_instancenorm : op_parser<parse_instancenorm>
MIGRAPHX_THROW
(
opd
.
op_name
+
": invalid output type: "
+
std
::
to_string
(
dtype
)
+
". Valid types are 1 (float), 10 (half), and 11 (double)."
);
auto
ndims
=
dims
.
size
();
bool
dyn_input
=
x
->
get_shape
().
dynamic
();
auto
ndims
=
x
->
get_shape
().
ndim
();
assert
(
ndims
>=
2
);
auto
kdims
=
ndims
-
2
;
std
::
vector
<
int64_t
>
axes
(
kdims
);
std
::
iota
(
axes
.
begin
(),
axes
.
end
(),
2
);
auto
mean
=
info
.
add_instruction
(
make_op
(
"reduce_mean"
,
{{
"axes"
,
axes
}}),
x
);
auto
mean_bcast
=
info
.
add_instruction
(
make_op
(
"multibroadcast"
,
{{
"out_lens"
,
dims
}}),
mean
);
auto
l1
=
info
.
add_instruction
(
make_op
(
"sub"
),
x
,
mean_bcast
);
// Use add_common_op() to insert multibroadcast/convert instructions where needed when
// inputs may be either static or dynamic.
auto
l1
=
info
.
add_common_op
(
"sub"
,
x
,
mean
);
// for the fp16, if not converting to fp32 then divide `x` and `mean` by `sqrt(n)` and take
// reduce_sum to calculate variance i.e.
// var = reduce_sum((x/s_n - mean/s_n)^2) where s_n = sqrt(n)
...
...
@@ -107,23 +108,32 @@ struct parse_instancenorm : op_parser<parse_instancenorm>
});
n
=
1.0
/
std
::
sqrt
(
n
);
auto
n_literal
=
info
.
add_literal
(
literal
{
dtype
,
{
n
}});
mean_bcast
=
info
.
add_common_op
(
"mul"
,
{
mean_bcast
,
n_literal
});
x
=
info
.
add_common_op
(
"mul"
,
{
x
,
n_literal
});
}
auto
l0
=
info
.
add_
instruction
(
make
_op
(
"sqdiff"
)
,
x
,
mean
_bcast
);
auto
l0
=
info
.
add_
common
_op
(
"sqdiff"
,
x
,
mean
);
auto
variance
=
info
.
add_instruction
(
make_op
(
reduce_op_name
,
{{
"axes"
,
axes
}}),
l0
);
auto
epsilon_literal
=
info
.
add_literal
(
literal
{
shape
{
literal_dtype
},
{
epsilon
}});
auto
epsilon_bcast
=
info
.
add_instruction
(
make_op
(
"multibroadcast"
,
{{
"out_lens"
,
dims
}}),
epsilon_literal
);
auto
variance_bcast
=
info
.
add_instruction
(
make_op
(
"multibroadcast"
,
{{
"out_lens"
,
dims
}}),
variance
);
auto
l2
=
info
.
add_instruction
(
make_op
(
"add"
),
variance_bcast
,
epsilon_bcast
);
auto
l2
=
info
.
add_common_op
(
"add"
,
variance
,
epsilon_literal
);
auto
l3
=
info
.
add_instruction
(
make_op
(
"rsqrt"
),
l2
);
auto
l4
=
info
.
add_instruction
(
make_op
(
"mul"
),
l1
,
l3
);
auto
scale_bcast
=
info
.
add_instruction
(
make_op
(
"broadcast"
,
{{
"axis"
,
1
},
{
"out_lens"
,
dims
}}),
scale
);
auto
bias_bcast
=
auto
l4
=
info
.
add_common_op
(
"mul"
,
l1
,
l3
);
// add_common_op() doesn't apply the plain broadcast op, so we add that op explicitly for
// both scale and bias.
instruction_ref
scale_bcast
;
instruction_ref
bias_bcast
;
if
(
dyn_input
)
{
scale_bcast
=
info
.
add_instruction
(
make_op
(
"broadcast"
,
{{
"axis"
,
1
}}),
scale
,
x
);
bias_bcast
=
info
.
add_instruction
(
make_op
(
"broadcast"
,
{{
"axis"
,
1
}}),
bias
,
x
);
}
else
{
scale_bcast
=
info
.
add_instruction
(
make_op
(
"broadcast"
,
{{
"axis"
,
1
},
{
"out_lens"
,
dims
}}),
scale
);
bias_bcast
=
info
.
add_instruction
(
make_op
(
"broadcast"
,
{{
"axis"
,
1
},
{
"out_lens"
,
dims
}}),
bias
);
}
auto
l5
=
info
.
add_instruction
(
make_op
(
"mul"
),
l4
,
scale_bcast
);
auto
ret
=
info
.
add_instruction
(
make_op
(
"add"
),
l5
,
bias_bcast
);
if
(
dtype
==
shape
::
half_type
and
convert_fp16
)
...
...
src/program.cpp
View file @
2ab23275
...
...
@@ -539,7 +539,8 @@ std::vector<argument> program::eval(parameter_map params, execution_environment
ins_out
[
x
]
=
ss
.
str
();
});
ret
=
generic_eval
(
*
this
,
ret
=
generic_eval
(
*
this
,
ctx
,
std
::
move
(
params
),
with_check_context
([
&
](
auto
&
ins
,
auto
f
,
auto
&&
check_context
)
{
...
...
@@ -551,15 +552,28 @@ std::vector<argument> program::eval(parameter_map params, execution_environment
ctx
.
finish
();
double
t2
=
t
.
record
<
milliseconds
>
();
std
::
cout
<<
"Time: "
<<
t1
<<
"ms, "
<<
t2
<<
"ms"
<<
std
::
endl
;
if
(
trace_level
>
1
and
ins
->
name
().
front
()
!=
'@'
and
ins
->
name
()
!=
"load"
and
not
result
.
empty
())
if
(
trace_level
>
1
and
ins
->
name
().
front
()
!=
'@'
and
ins
->
name
()
!=
"load"
and
not
result
.
empty
())
{
migraphx
::
argument
buffer
;
try
{
target
tgt
=
make_target
(
this
->
impl
->
target_name
);
auto
buffer
=
tgt
.
copy_from
(
result
);
buffer
=
tgt
.
copy_from
(
result
);
}
catch
(
const
migraphx
::
exception
&
)
{
// instruction was run on host then no need to copy buffer from target
buffer
=
result
;
}
catch
(...)
{
MIGRAPHX_THROW
(
"MIGraphX program execution with MIGRAPHX_TRACE_EVAL failed.
\n
"
);
}
if
(
trace_level
==
2
)
{
std
::
cout
<<
"Output has "
<<
to_string_range
(
classify_argument
(
buffer
))
std
::
cout
<<
"Output has "
<<
to_string_range
(
classify_argument
(
buffer
))
<<
std
::
endl
;
std
::
cout
<<
"Output: "
;
preview_argument
(
std
::
cout
,
buffer
);
...
...
src/targets/gpu/CMakeLists.txt
View file @
2ab23275
...
...
@@ -35,12 +35,11 @@ endif()
find_package
(
composable_kernel 1.0.0 COMPONENTS jit_library REQUIRED
)
set
(
MIGRAPHX_USE_HIPRTC OFF CACHE BOOL
"Use hipRTC APIs"
)
#if(BUILD_DEV)
#set(MIGRAPHX_USE_HIPRTC OFF CACHE BOOL "Use hipRTC APIs")
#else()
#set(MIGRAPHX_USE_HIPRTC ON CACHE BOOL "Use hipRTC APIs")
#endif()
if
(
BUILD_DEV
)
set
(
MIGRAPHX_USE_HIPRTC OFF CACHE BOOL
"Use hipRTC APIs"
)
else
()
set
(
MIGRAPHX_USE_HIPRTC ON CACHE BOOL
"Use hipRTC APIs"
)
endif
()
include
(
Embed
)
file
(
GLOB KERNEL_FILES
${
CONFIGURE_DEPENDS
}
...
...
src/targets/gpu/hip.cpp
View file @
2ab23275
...
...
@@ -146,7 +146,11 @@ std::vector<T> read_from_gpu(const void* x, std::size_t sz)
gpu_sync
();
std
::
vector
<
T
>
result
(
sz
);
assert
(
not
is_device_ptr
(
result
.
data
()));
assert
(
is_device_ptr
(
x
));
if
(
not
is_device_ptr
(
x
))
{
MIGRAPHX_THROW
(
"read_from_gpu() requires Src buffer to be on the GPU, Copy from gpu failed
\n
"
);
}
auto
status
=
hipMemcpy
(
result
.
data
(),
x
,
sz
*
sizeof
(
T
),
hipMemcpyDeviceToHost
);
if
(
status
!=
hipSuccess
)
MIGRAPHX_THROW
(
"Copy from gpu failed: "
+
hip_error
(
status
));
// NOLINT
...
...
src/targets/gpu/jit/ck_gemm.cpp
View file @
2ab23275
...
...
@@ -22,7 +22,7 @@
* THE SOFTWARE.
*/
#include <fstream>
#include <filesystem>
#include <
migraphx/
filesystem
.hpp
>
#include <migraphx/gpu/compiler.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/gpu/context.hpp>
...
...
src/targets/gpu/kernels/include/migraphx/kernels/math.hpp
View file @
2ab23275
...
...
@@ -188,10 +188,8 @@ MIGRAPHX_DEVICE_MATH_BINARY_FOR(float, max, ::max)
MIGRAPHX_DEVICE_MATH_BINARY_FOR
(
float
,
min
,
::
min
)
MIGRAPHX_DEVICE_MATH_BINARY_FOR
(
double
,
max
,
::
max
)
MIGRAPHX_DEVICE_MATH_BINARY_FOR
(
double
,
min
,
::
min
)
// Add overloads for half that calls the float version, this should use "hmax" and "hmin" once
// perf CI docker is upgraded to rocm-5.5
MIGRAPHX_DEVICE_MATH_BINARY_FOR
(
migraphx
::
half
,
max
,
::
fmaxf
)
MIGRAPHX_DEVICE_MATH_BINARY_FOR
(
migraphx
::
half
,
min
,
::
fminf
)
MIGRAPHX_DEVICE_MATH_BINARY_FOR
(
migraphx
::
half
,
max
,
::
__hmax
)
MIGRAPHX_DEVICE_MATH_BINARY_FOR
(
migraphx
::
half
,
min
,
::
__hmin
)
template
<
class
T
,
MIGRAPHX_REQUIRES
(
not
is_any_vec
<
T
>())
>
constexpr
auto
max
(
const
T
&
a
,
const
T
&
b
)
...
...
src/value.cpp
View file @
2ab23275
...
...
@@ -527,6 +527,9 @@ std::size_t value_hash(const std::string& key, const T& x)
hash_combine
(
h
,
x
);
return
h
;
}
std
::
size_t
value_hash
(
const
std
::
string
&
key
,
std
::
nullptr_t
)
{
return
hash_value
(
key
);
}
std
::
size_t
value_hash
(
const
std
::
string
&
key
,
const
std
::
vector
<
value
>&
x
)
{
std
::
size_t
h
=
hash_value
(
key
);
...
...
test/api/test_gpu.cpp
View file @
2ab23275
...
...
@@ -151,7 +151,7 @@ TEST_CASE(dynamic_batch_load_and_run_offload)
c_options
.
set_offload_copy
();
p
.
compile
(
migraphx
::
target
(
"gpu"
),
c_options
);
auto
out_shapes
=
p
.
get_output_shapes
();
CH
EC
K
(
out_shapes
.
size
()
==
1
);
EXP
EC
T
(
out_shapes
.
size
()
==
1
);
EXPECT
(
out_shapes
[
0
].
dynamic
());
// batch size = 2
...
...
@@ -165,9 +165,9 @@ TEST_CASE(dynamic_batch_load_and_run_offload)
migraphx
::
argument
(
migraphx
::
shape
(
migraphx_shape_float_type
,
{
2
,
3
,
3
,
3
}),
c
.
data
()));
auto
outputs
=
p
.
eval
(
pp
);
CH
EC
K
(
shapes_before
.
size
()
==
outputs
.
size
());
CH
EC
K
(
bool
{
outputs
.
front
().
get_shape
()
==
migraphx
::
shape
(
migraphx_shape_float_type
,
{
2
,
1
,
3
,
3
})});
EXP
EC
T
(
shapes_before
.
size
()
==
outputs
.
size
());
EXP
EC
T
(
bool
{
outputs
.
front
().
get_shape
()
==
migraphx
::
shape
(
migraphx_shape_float_type
,
{
2
,
2
,
2
,
2
})});
}
TEST_CASE
(
load_and_run_async
)
...
...
test/onnx/gen_onnx.py
View file @
2ab23275
...
...
@@ -3341,6 +3341,39 @@ def instance_norm_type_mismatch_test():
return
([
node
],
[
x
,
scale
,
bias
],
[
y
])
@
onnx_test
()
def
instance_norm_dyn_batch_test
():
# the batch size is a dynamic dimension
x
=
helper
.
make_tensor_value_info
(
'0'
,
TensorProto
.
FLOAT
,
[
None
,
2
,
3
,
3
])
scale
=
helper
.
make_tensor_value_info
(
'1'
,
TensorProto
.
FLOAT
,
[
2
])
bias
=
helper
.
make_tensor_value_info
(
'2'
,
TensorProto
.
FLOAT
,
[
2
])
y
=
helper
.
make_tensor_value_info
(
'3'
,
TensorProto
.
FLOAT
,
[
None
,
2
,
3
,
3
])
node
=
onnx
.
helper
.
make_node
(
'InstanceNormalization'
,
inputs
=
[
'0'
,
'1'
,
'2'
],
outputs
=
[
'3'
])
return
([
node
],
[
x
,
scale
,
bias
],
[
y
])
return
([
node
],
[
x
,
scale
,
bias
],
[
y
])
@
onnx_test
()
def
instance_norm_dyn_batch_half_test
():
# the batch size is a dynamic dimension
x
=
helper
.
make_tensor_value_info
(
'0'
,
TensorProto
.
FLOAT16
,
[
None
,
2
,
3
,
3
])
scale
=
helper
.
make_tensor_value_info
(
'1'
,
TensorProto
.
FLOAT16
,
[
2
])
bias
=
helper
.
make_tensor_value_info
(
'2'
,
TensorProto
.
FLOAT16
,
[
2
])
y
=
helper
.
make_tensor_value_info
(
'3'
,
TensorProto
.
FLOAT16
,
[
None
,
2
,
3
,
3
])
node
=
onnx
.
helper
.
make_node
(
'InstanceNormalization'
,
inputs
=
[
'0'
,
'1'
,
'2'
],
outputs
=
[
'3'
])
return
([
node
],
[
x
,
scale
,
bias
],
[
y
])
@
onnx_test
()
def
instance_norm_invalid_type_test
():
x
=
helper
.
make_tensor_value_info
(
'0'
,
TensorProto
.
INT32
,
[
1
,
2
,
3
,
3
])
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment