Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
57444235
Commit
57444235
authored
Oct 29, 2018
by
Khalique
Browse files
fix merge conflict
parents
a0ea12f6
d8bf45cf
Changes
49
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
375 additions
and
69 deletions
+375
-69
CMakeLists.txt
CMakeLists.txt
+6
-0
src/CMakeLists.txt
src/CMakeLists.txt
+1
-0
src/common_subexpression_elimination.cpp
src/common_subexpression_elimination.cpp
+37
-0
src/include/migraph/builtin.hpp
src/include/migraph/builtin.hpp
+15
-0
src/include/migraph/common_subexpression_elimination.hpp
src/include/migraph/common_subexpression_elimination.hpp
+19
-0
src/include/migraph/instruction.hpp
src/include/migraph/instruction.hpp
+9
-1
src/include/migraph/operators.hpp
src/include/migraph/operators.hpp
+53
-2
src/include/migraph/program.hpp
src/include/migraph/program.hpp
+4
-0
src/include/migraph/ranges.hpp
src/include/migraph/ranges.hpp
+6
-0
src/instruction.cpp
src/instruction.cpp
+12
-6
src/onnx/onnx.cpp
src/onnx/onnx.cpp
+53
-3
src/program.cpp
src/program.cpp
+62
-24
src/targets/cpu/cpu_lowering.cpp
src/targets/cpu/cpu_lowering.cpp
+44
-17
src/targets/gpu/CMakeLists.txt
src/targets/gpu/CMakeLists.txt
+2
-0
src/targets/gpu/add.cpp
src/targets/gpu/add.cpp
+3
-3
src/targets/gpu/batchnorm.cpp
src/targets/gpu/batchnorm.cpp
+1
-1
src/targets/gpu/concat.cpp
src/targets/gpu/concat.cpp
+27
-0
src/targets/gpu/contiguous.cpp
src/targets/gpu/contiguous.cpp
+4
-3
src/targets/gpu/convolution.cpp
src/targets/gpu/convolution.cpp
+9
-5
src/targets/gpu/device/add.cpp
src/targets/gpu/device/add.cpp
+8
-4
No files found.
CMakeLists.txt
View file @
57444235
...
...
@@ -4,6 +4,12 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_BINARY_DIR}")
message
(
FATAL_ERROR
"The binary and source directroy cannot be the same"
)
endif
()
# This has to be initialized before the project() command appears
# Set the default of CMAKE_BUILD_TYPE to be release, unless user specifies with -D. MSVC_IDE does not use CMAKE_BUILD_TYPE
if
(
NOT MSVC_IDE AND NOT CMAKE_BUILD_TYPE
)
set
(
CMAKE_BUILD_TYPE Release CACHE STRING
"Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel."
)
endif
()
project
(
migraphlib
)
find_package
(
ROCM REQUIRED
)
...
...
src/CMakeLists.txt
View file @
57444235
add_library
(
migraph
auto_contiguous.cpp
common_subexpression_elimination.cpp
constant_propagate.cpp
dead_code_elimination.cpp
eliminate_allocation.cpp
...
...
src/common_subexpression_elimination.cpp
0 → 100644
View file @
57444235
#include <migraph/common_subexpression_elimination.hpp>
#include <migraph/program.hpp>
#include <migraph/instruction.hpp>
#include <migraph/iterator_for.hpp>
#include <migraph/ranges.hpp>
#include <migraph/functional.hpp>
#include <unordered_set>
namespace
migraph
{
template
<
class
Range
>
void
cse_range
(
program
&
p
,
Range
&&
r
)
{
std
::
unordered_multimap
<
std
::
string
,
instruction_ref
>
instructions
;
for
(
auto
ins
:
r
)
{
// Skip dead instructions
if
(
ins
->
outputs
().
empty
())
continue
;
// Find instruction with the same name
auto
found_instructions
=
range
(
instructions
.
equal_range
(
ins
->
name
()));
for
(
const
auto
&
pp
:
found_instructions
)
{
auto
eq
=
pp
.
second
;
if
(
*
eq
!=
*
ins
)
continue
;
p
.
replace_instruction
(
ins
,
eq
);
cse_range
(
p
,
eq
->
outputs
());
}
instructions
.
emplace
(
ins
->
name
(),
ins
);
}
}
void
common_subexpression_elimination
::
apply
(
program
&
p
)
const
{
cse_range
(
p
,
iterator_for
(
p
));
}
}
// namespace migraph
src/include/migraph/builtin.hpp
View file @
57444235
...
...
@@ -4,6 +4,7 @@
#include <migraph/context.hpp>
#include <migraph/errors.hpp>
#include <migraph/argument.hpp>
#include <migraph/reflect.hpp>
namespace
migraph
{
...
...
@@ -22,6 +23,13 @@ struct literal
struct
outline
{
shape
s
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
pack
(
f
(
self
.
s
,
"shape"
));
}
std
::
string
name
()
const
{
return
"@outline"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
)
const
{
return
s
;
}
argument
compute
(
context
&
,
const
shape
&
,
const
std
::
vector
<
argument
>&
)
const
...
...
@@ -33,6 +41,13 @@ struct outline
struct
param
{
std
::
string
parameter
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
pack
(
f
(
self
.
parameter
,
"parameter"
));
}
std
::
string
name
()
const
{
return
"@param"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
)
const
{
MIGRAPH_THROW
(
"builtin"
);
}
argument
compute
(
context
&
,
const
shape
&
,
const
std
::
vector
<
argument
>&
)
const
...
...
src/include/migraph/common_subexpression_elimination.hpp
0 → 100644
View file @
57444235
#ifndef MIGRAPH_GUARD_RTGLIB_COMMON_SUBEXPRESSION_ELIMINATION_HPP
#define MIGRAPH_GUARD_RTGLIB_COMMON_SUBEXPRESSION_ELIMINATION_HPP
#include <string>
#include <migraph/instruction_ref.hpp>
namespace
migraph
{
struct
program
;
struct
common_subexpression_elimination
{
std
::
string
name
()
const
{
return
"common_subexpression_elimination"
;
}
void
apply
(
program
&
p
)
const
;
};
}
// namespace migraph
#endif
src/include/migraph/instruction.hpp
View file @
57444235
...
...
@@ -5,6 +5,7 @@
#include <migraph/shape.hpp>
#include <migraph/instruction_ref.hpp>
#include <migraph/operation.hpp>
#include <migraph/erase.hpp>
#include <string>
#include <utility>
...
...
@@ -43,6 +44,10 @@ struct instruction
const
std
::
vector
<
instruction_ref
>&
outputs
()
const
;
friend
bool
operator
==
(
const
instruction
&
x
,
const
instruction
&
y
);
friend
bool
operator
!=
(
const
instruction
&
x
,
const
instruction
&
y
);
friend
bool
operator
==
(
instruction_ref
ref
,
const
instruction
&
i
);
friend
bool
operator
!=
(
const
instruction
&
i
,
instruction_ref
ref
);
...
...
@@ -52,7 +57,10 @@ struct instruction
void
add_output
(
instruction_ref
ins
);
template
<
class
T
>
void
remove_output
(
const
T
&
ins
);
void
remove_output
(
const
T
&
ins
)
{
migraph
::
erase
(
output
,
ins
);
}
static
void
backreference
(
instruction_ref
ref
);
...
...
src/include/migraph/operators.hpp
View file @
57444235
...
...
@@ -314,6 +314,57 @@ struct contiguous
}
};
struct
concat
{
std
::
size_t
axis
=
0
;
std
::
string
name
()
const
{
return
"concat"
;
}
std
::
vector
<
std
::
size_t
>
compute_offsets
(
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>
args
)
const
{
std
::
vector
<
std
::
size_t
>
offsets
;
std
::
vector
<
std
::
size_t
>
offset
(
args
[
0
].
get_shape
().
lens
().
size
(),
0
);
offset
[
axis
]
=
0
;
for
(
const
auto
&
arg
:
args
)
{
offsets
.
push_back
(
output_shape
.
index
(
offset
));
offset
[
axis
]
+=
arg
.
get_shape
().
lens
()[
axis
];
}
return
offsets
;
}
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
{
if
(
inputs
.
empty
())
{
MIGRAPH_THROW
(
"Number of input tensors should exceed 0"
);
}
const
auto
&
first_shape_lens
=
inputs
.
front
().
lens
();
const
auto
&
type
=
inputs
.
front
().
type
();
for
(
std
::
size_t
l
=
0
;
l
<
first_shape_lens
.
size
();
l
++
)
{
if
(
l
!=
axis
)
{
if
(
!
std
::
all_of
(
inputs
.
begin
(),
inputs
.
end
(),
[
&
](
auto
s
)
{
return
s
.
lens
()[
l
]
==
first_shape_lens
[
l
];
}))
{
MIGRAPH_THROW
(
"Non-axis dimensions should match"
);
}
}
}
std
::
size_t
new_dim_axis
=
0
;
for
(
const
auto
&
input
:
inputs
)
{
const
auto
&
lens
=
input
.
lens
();
new_dim_axis
+=
lens
[
axis
];
}
std
::
vector
<
std
::
size_t
>
new_lens
;
std
::
copy
(
first_shape_lens
.
begin
(),
first_shape_lens
.
end
(),
std
::
back_inserter
(
new_lens
));
new_lens
[
axis
]
=
new_dim_axis
;
return
{
type
,
new_lens
};
}
};
struct
slice
{
std
::
vector
<
int64_t
>
axes
;
...
...
@@ -531,7 +582,7 @@ struct reshape
}
};
struct
gemm
struct
dot
{
float
alpha
=
1.0
;
float
beta
=
0.0
;
...
...
@@ -542,7 +593,7 @@ struct gemm
return
pack
(
f
(
self
.
alpha
,
"alpha"
),
f
(
self
.
beta
,
"beta"
));
}
std
::
string
name
()
const
{
return
"
gemm
"
;
}
std
::
string
name
()
const
{
return
"
dot
"
;
}
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
{
check_shapes
{
inputs
,
*
this
}.
has
(
2
).
same_type
();
...
...
src/include/migraph/program.hpp
View file @
57444235
...
...
@@ -95,6 +95,10 @@ struct program
void
perf_report
(
std
::
ostream
&
os
,
std
::
size_t
n
,
parameter_map
params
)
const
;
void
debug_print
();
void
debug_print
(
instruction_ref
ins
);
void
debug_print
(
const
std
::
vector
<
instruction_ref
>&
inss
);
friend
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
const
program
&
p
);
friend
bool
operator
==
(
const
program
&
x
,
const
program
&
y
);
friend
bool
operator
!=
(
const
program
&
x
,
const
program
&
y
)
{
return
!
(
x
==
y
);
}
...
...
src/include/migraph/ranges.hpp
View file @
57444235
...
...
@@ -92,6 +92,12 @@ iterator_range<Iterator> range(Iterator start, Iterator last)
return
{
start
,
last
};
}
template
<
class
Iterator
>
iterator_range
<
Iterator
>
range
(
std
::
pair
<
Iterator
,
Iterator
>
p
)
{
return
{
p
.
first
,
p
.
second
};
}
}
// namespace migraph
#endif
src/instruction.cpp
View file @
57444235
...
...
@@ -94,6 +94,17 @@ const std::vector<instruction_ref>& instruction::inputs() const { return argumen
const
std
::
vector
<
instruction_ref
>&
instruction
::
outputs
()
const
{
return
output
;
}
bool
operator
==
(
const
instruction
&
x
,
const
instruction
&
y
)
{
if
(
not
(
x
.
result
==
y
.
result
and
x
.
op
==
y
.
op
and
x
.
arguments
==
y
.
arguments
))
return
false
;
if
(
x
.
name
()
==
"@literal"
)
return
x
.
lit
==
y
.
lit
;
return
true
;
}
bool
operator
!=
(
const
instruction
&
x
,
const
instruction
&
y
)
{
return
!
(
x
==
y
);
}
bool
operator
==
(
instruction_ref
ref
,
const
instruction
&
i
)
{
return
i
==
ref
;
}
bool
operator
!=
(
const
instruction
&
i
,
instruction_ref
ref
)
{
return
!
(
i
==
ref
);
}
...
...
@@ -106,12 +117,6 @@ void instruction::add_output(instruction_ref ins)
output
.
push_back
(
ins
);
}
template
<
class
T
>
void
instruction
::
remove_output
(
const
T
&
ins
)
{
migraph
::
erase
(
output
,
ins
);
}
void
instruction
::
backreference
(
instruction_ref
ref
)
{
for
(
auto
&&
arg
:
ref
->
inputs
())
...
...
@@ -151,6 +156,7 @@ void instruction::replace(std::vector<instruction_ref> args)
void
instruction
::
replace_argument
(
instruction_ref
old
,
instruction_ref
new_ins
)
{
assert
(
std
::
any_of
(
arguments
.
begin
(),
arguments
.
end
(),
[
&
](
auto
i
)
{
return
i
==
old
;
}));
std
::
replace
(
arguments
.
begin
(),
arguments
.
end
(),
old
,
new_ins
);
old
->
remove_output
(
*
this
);
}
...
...
src/onnx/onnx.cpp
View file @
57444235
...
...
@@ -50,7 +50,7 @@ struct onnx_parser
{
add_generic_op
(
"Add"
,
op
::
add
{});
add_generic_op
(
"Div"
,
op
::
div
{});
add_generic_op
(
"MatMul"
,
op
::
gemm
{});
add_generic_op
(
"MatMul"
,
op
::
dot
{});
add_generic_op
(
"Mul"
,
op
::
mul
{});
add_generic_op
(
"Relu"
,
op
::
activation
{
"relu"
});
add_generic_op
(
"Sub"
,
op
::
sub
{});
...
...
@@ -67,6 +67,10 @@ struct onnx_parser
add_mem_op
(
"Gemm"
,
&
onnx_parser
::
parse_gemm
);
add_mem_op
(
"BatchNormalization"
,
&
onnx_parser
::
parse_batchnorm
);
add_mem_op
(
"Softmax"
,
&
onnx_parser
::
parse_softmax
);
add_mem_op
(
"Squeeze"
,
&
onnx_parser
::
parse_squeeze
);
add_mem_op
(
"Unsqueeze"
,
&
onnx_parser
::
parse_unsqueeze
);
add_mem_op
(
"Slice"
,
&
onnx_parser
::
parse_slice
);
add_mem_op
(
"Concat"
,
&
onnx_parser
::
parse_concat
);
}
template
<
class
F
>
...
...
@@ -188,6 +192,52 @@ struct onnx_parser
return
prog
.
add_instruction
(
op
::
flatten
{
axis
},
args
[
0
]);
}
instruction_ref
parse_squeeze
(
const
std
::
string
&
,
attribute_map
attributes
,
std
::
vector
<
instruction_ref
>
args
)
{
op
::
squeeze
op
;
literal
s
=
parse_value
(
attributes
.
at
(
"axes"
));
s
.
visit
([
&
](
auto
v
)
{
copy
(
v
,
std
::
back_inserter
(
op
.
axes
));
});
return
prog
.
add_instruction
(
op
,
args
[
0
]);
}
instruction_ref
parse_unsqueeze
(
const
std
::
string
&
,
attribute_map
attributes
,
std
::
vector
<
instruction_ref
>
args
)
{
op
::
unsqueeze
op
;
literal
s
=
parse_value
(
attributes
.
at
(
"axes"
));
s
.
visit
([
&
](
auto
v
)
{
copy
(
v
,
std
::
back_inserter
(
op
.
axes
));
});
return
prog
.
add_instruction
(
op
,
args
[
0
]);
}
instruction_ref
parse_concat
(
const
std
::
string
&
,
attribute_map
attributes
,
std
::
vector
<
instruction_ref
>
args
)
{
std
::
size_t
axis
=
parse_value
(
attributes
.
at
(
"axis"
)).
at
<
int
>
();
op
::
concat
op
{
axis
};
return
prog
.
add_instruction
(
op
,
std
::
move
(
args
));
}
instruction_ref
parse_slice
(
const
std
::
string
&
,
attribute_map
attributes
,
std
::
vector
<
instruction_ref
>
args
)
{
op
::
slice
op
;
if
(
contains
(
attributes
,
"axes"
))
{
literal
s
=
parse_value
(
attributes
.
at
(
"axes"
));
s
.
visit
([
&
](
auto
v
)
{
copy
(
v
,
std
::
back_inserter
(
op
.
axes
));
});
}
{
literal
s
=
parse_value
(
attributes
.
at
(
"ends"
));
s
.
visit
([
&
](
auto
v
)
{
copy
(
v
,
std
::
back_inserter
(
op
.
ends
));
});
}
{
literal
s
=
parse_value
(
attributes
.
at
(
"starts"
));
s
.
visit
([
&
](
auto
v
)
{
copy
(
v
,
std
::
back_inserter
(
op
.
starts
));
});
}
return
prog
.
add_instruction
(
op
,
args
[
0
]);
}
instruction_ref
parse_constant
(
const
std
::
string
&
,
attribute_map
attributes
,
const
std
::
vector
<
instruction_ref
>&
)
...
...
@@ -225,11 +275,11 @@ struct onnx_parser
if
(
args
.
size
()
==
3
)
{
uint64_t
axis
=
1
;
auto
l3
=
prog
.
add_instruction
(
op
::
gemm
{
alpha
,
beta
},
l1
,
l2
);
auto
l3
=
prog
.
add_instruction
(
op
::
dot
{
alpha
,
beta
},
l1
,
l2
);
auto
l4
=
prog
.
add_instruction
(
op
::
broadcast
{
axis
,
l3
->
get_shape
()},
args
[
2
]);
return
prog
.
add_instruction
(
op
::
add
{},
l3
,
l4
);
}
return
prog
.
add_instruction
(
op
::
gemm
{
alpha
,
beta
},
l1
,
l2
);
return
prog
.
add_instruction
(
op
::
dot
{
alpha
,
beta
},
l1
,
l2
);
}
instruction_ref
...
...
src/program.cpp
View file @
57444235
...
...
@@ -23,6 +23,36 @@ struct program_impl
const
operation
&
get_operation
(
instruction_ref
ins
)
{
return
ins
->
get_operator
();
}
static
void
print_instruction
(
std
::
ostream
&
os
,
instruction_ref
ins
,
const
std
::
unordered_map
<
instruction_ref
,
std
::
string
>&
names
)
{
os
<<
names
.
at
(
ins
)
<<
" = "
;
os
<<
ins
->
get_operator
();
if
(
ins
->
name
()
==
"@literal"
)
{
if
(
ins
->
get_literal
().
get_shape
().
elements
()
>
10
)
os
<<
"{ ... }"
;
else
os
<<
"{"
<<
ins
->
get_literal
()
<<
"}"
;
}
if
(
!
ins
->
inputs
().
empty
())
{
char
delim
=
'('
;
for
(
auto
&&
arg
:
ins
->
inputs
())
{
os
<<
delim
<<
names
.
at
(
arg
);
delim
=
','
;
}
os
<<
")"
;
}
os
<<
" -> "
<<
ins
->
get_shape
();
}
template
<
class
F
>
static
void
print_program
(
std
::
ostream
&
os
,
const
program
&
p
,
F
annonate
)
{
...
...
@@ -36,38 +66,21 @@ static void print_program(std::ostream& os, const program& p, F annonate)
{
var_name
=
any_cast
<
builtin
::
param
>
(
ins
->
get_operator
()).
parameter
;
}
names
.
emplace
(
ins
,
var_name
);
os
<<
var_name
<<
" = "
;
os
<<
ins
->
get_operator
();
if
(
ins
->
name
()
==
"@literal"
)
{
if
(
ins
->
get_literal
().
get_shape
().
elements
()
>
10
)
os
<<
"{ ... }"
;
else
os
<<
"{"
<<
ins
->
get_literal
()
<<
"}"
;
}
if
(
!
ins
->
inputs
().
empty
())
// TODO: Use all_of
for
(
auto
&&
arg
:
ins
->
inputs
())
{
char
delim
=
'('
;
for
(
auto
&&
arg
:
ins
->
inputs
())
{
assert
(
p
.
has_instruction
(
arg
)
&&
"Instruction not found"
);
os
<<
delim
<<
names
.
at
(
arg
);
delim
=
','
;
}
os
<<
")"
;
assert
(
p
.
has_instruction
(
arg
)
&&
"Instruction not found"
);
(
void
)
arg
;
}
os
<<
" -> "
<<
ins
->
get_shape
(
);
print_instruction
(
os
,
ins
,
names
);
annonate
(
ins
,
names
);
os
<<
std
::
endl
;
names
.
emplace
(
ins
,
var_name
);
count
++
;
}
}
...
...
@@ -124,7 +137,9 @@ instruction_ref program::replace_instruction(instruction_ref ins, instruction_re
{
return
rep
;
}
for
(
auto
&&
out
:
ins
->
outputs
())
// Make a copy of outputs which can be changed when calling replace_argument
auto
outputs
=
ins
->
outputs
();
for
(
auto
out
:
outputs
)
{
// TODO: Check for possible cycles
if
(
out
!=
rep
)
...
...
@@ -135,6 +150,10 @@ instruction_ref program::replace_instruction(instruction_ref ins, instruction_re
}
// Replacement should not be dead code unless its the last instruction
assert
(
!
rep
->
outputs
().
empty
()
or
rep
==
std
::
prev
(
end
()));
// Output of the original instruction should only be the replacement or empty
assert
(
ins
->
outputs
().
empty
()
or
std
::
all_of
(
ins
->
outputs
().
begin
(),
ins
->
outputs
().
end
(),
[
&
](
auto
i
)
{
return
i
==
rep
;
}));
assert
(
ins
->
valid
(
begin
()));
assert
(
rep
->
valid
(
begin
()));
return
rep
;
...
...
@@ -449,6 +468,25 @@ void program::perf_report(std::ostream& os, std::size_t n, parameter_map params)
<<
", "
<<
std
::
round
(
calculate_overhead_percent
)
<<
"%"
<<
std
::
endl
;
}
void
program
::
debug_print
()
{
std
::
cout
<<
*
this
<<
std
::
endl
;
}
void
program
::
debug_print
(
instruction_ref
ins
)
{
std
::
stringstream
ss
;
print_program
(
ss
,
*
this
,
[
&
](
auto
x
,
auto
&&
names
)
{
if
(
x
==
ins
)
{
print_instruction
(
std
::
cout
,
x
,
names
);
std
::
cout
<<
std
::
endl
;
}
});
}
void
program
::
debug_print
(
const
std
::
vector
<
instruction_ref
>&
inss
)
{
for
(
auto
ins
:
inss
)
debug_print
(
ins
);
std
::
cout
<<
std
::
endl
;
}
bool
operator
==
(
const
program
&
x
,
const
program
&
y
)
{
return
to_string
(
x
)
==
to_string
(
y
);
}
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
const
program
&
p
)
...
...
src/targets/cpu/cpu_lowering.cpp
View file @
57444235
...
...
@@ -282,10 +282,38 @@ struct cpu_contiguous
}
};
struct
cpu_concat
{
op
::
concat
op
;
std
::
string
name
()
const
{
return
"cpu::concat"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
return
op
.
compute_shape
(
inputs
);
}
argument
compute
(
context
&
,
const
shape
&
output_shape
,
std
::
vector
<
argument
>
args
)
const
{
argument
result
{
output_shape
};
std
::
vector
<
std
::
size_t
>
coffsets
=
op
.
compute_offsets
(
output_shape
,
args
);
for
(
std
::
size_t
l
=
0
;
l
<
args
.
size
();
l
++
)
{
auto
argl
=
args
[
l
];
std
::
size_t
nelements
=
argl
.
get_shape
().
elements
();
visit_all
(
result
,
argl
)([
&
](
auto
output
,
auto
input
)
{
auto
slice_shape
=
shape
{
output_shape
.
type
(),
input
.
get_shape
().
lens
(),
output_shape
.
strides
()};
auto
slice
=
make_view
(
slice_shape
,
output
.
data
()
+
coffsets
[
l
]);
// cppcheck-suppress useStlAlgorithm
for
(
std
::
size_t
i
=
0
;
i
<
nelements
;
i
++
)
{
slice
[
i
]
=
input
[
i
];
}
});
}
return
result
;
}
};
struct
cpu_gemm
{
op
::
gemm
op
;
std
::
string
name
()
const
{
return
"cpu::
gemm
"
;
}
op
::
dot
op
;
std
::
string
name
()
const
{
return
"cpu::
dot
"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
return
op
.
compute_shape
(
inputs
);
}
argument
compute
(
context
&
,
const
shape
&
output_shape
,
std
::
vector
<
argument
>
args
)
const
...
...
@@ -564,25 +592,24 @@ struct cpu_apply
{
apply_map
[
"im2col"
]
=
extend_op
<
cpu_im2col
,
op
::
im2col
>
();
apply_map
[
"convolution"
]
=
extend_op
<
cpu_convolution
,
op
::
convolution
>
();
apply_map
[
"
gemm
"
]
=
extend_op
<
cpu_gemm
,
op
::
gemm
>
();
apply_map
[
"
dot
"
]
=
extend_op
<
cpu_gemm
,
op
::
dot
>
();
apply_map
[
"batch_norm_inference"
]
=
extend_op
<
cpu_batch_norm_inference
,
op
::
batch_norm_inference
>
();
apply_map
[
"contiguous"
]
=
extend_op
<
cpu_contiguous
,
op
::
contiguous
>
();
apply_map
[
"concat"
]
=
extend_op
<
cpu_concat
,
op
::
concat
>
();
apply_map
[
"leaky_relu"
]
=
extend_op
<
cpu_unary
<
leaky_relu_op
>
,
op
::
leaky_relu
>
();
apply_map
[
"identity"
]
=
simple_op
<
cpu_unary
<
identity_op
>>
();
apply_map
[
"tanh"
]
=
simple_op
<
cpu_unary
<
tanh_op
>>
();
apply_map
[
"sigmoid"
]
=
simple_op
<
cpu_unary
<
sigmoid_op
>>
();
apply_map
[
"exp"
]
=
simple_op
<
cpu_unary
<
exp_op
>>
();
apply_map
[
"neg"
]
=
simple_op
<
cpu_unary
<
neg_op
>>
();
apply_map
[
"sin"
]
=
simple_op
<
cpu_unary
<
sin_op
>>
();
apply_map
[
"cos"
]
=
simple_op
<
cpu_unary
<
cos_op
>>
();
apply_map
[
"tan"
]
=
simple_op
<
cpu_unary
<
tan_op
>>
();
apply_map
[
"add"
]
=
simple_op
<
cpu_binary
<
add_op
>>
();
apply_map
[
"sub"
]
=
simple_op
<
cpu_binary
<
sub_op
>>
();
apply_map
[
"mul"
]
=
simple_op
<
cpu_binary
<
mul_op
>>
();
// apply_map["scalar"] = simple_op<cpu_binary<mul_op>>();
apply_map
[
"div"
]
=
simple_op
<
cpu_binary
<
div_op
>>
();
apply_map
[
"identity"
]
=
simple_op
<
cpu_unary
<
identity_op
>>
();
apply_map
[
"tanh"
]
=
simple_op
<
cpu_unary
<
tanh_op
>>
();
apply_map
[
"sigmoid"
]
=
simple_op
<
cpu_unary
<
sigmoid_op
>>
();
apply_map
[
"exp"
]
=
simple_op
<
cpu_unary
<
exp_op
>>
();
apply_map
[
"neg"
]
=
simple_op
<
cpu_unary
<
neg_op
>>
();
apply_map
[
"sin"
]
=
simple_op
<
cpu_unary
<
sin_op
>>
();
apply_map
[
"cos"
]
=
simple_op
<
cpu_unary
<
cos_op
>>
();
apply_map
[
"tan"
]
=
simple_op
<
cpu_unary
<
tan_op
>>
();
apply_map
[
"add"
]
=
simple_op
<
cpu_binary
<
add_op
>>
();
apply_map
[
"sub"
]
=
simple_op
<
cpu_binary
<
sub_op
>>
();
apply_map
[
"mul"
]
=
simple_op
<
cpu_binary
<
mul_op
>>
();
apply_map
[
"div"
]
=
simple_op
<
cpu_binary
<
div_op
>>
();
apply_map
[
"softmax"
]
=
simple_op
<
softmax2d
>
();
}
...
...
src/targets/gpu/CMakeLists.txt
View file @
57444235
...
...
@@ -15,6 +15,7 @@ add_library(migraph_device
device/add_relu.cpp
device/contiguous.cpp
device/mul.cpp
device/concat.cpp
)
rocm_clang_tidy_check
(
migraph_device
)
target_link_libraries
(
migraph_device migraph hip::device
)
...
...
@@ -32,6 +33,7 @@ add_library(migraph_gpu
convolution.cpp
softmax.cpp
contiguous.cpp
concat.cpp
relu.cpp
leaky_relu.cpp
add.cpp
...
...
src/targets/gpu/add.cpp
View file @
57444235
...
...
@@ -14,9 +14,9 @@ shape hip_add::compute_shape(const std::vector<shape>& inputs) const
return
inputs
.
at
(
0
);
}
argument
hip_add
::
compute
(
context
&
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
argument
hip_add
::
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
{
device
::
add
(
args
[
2
],
args
[
0
],
args
[
1
]);
device
::
add
(
ctx
.
get_stream
().
get
(),
args
[
2
],
args
[
0
],
args
[
1
]);
return
args
[
2
];
}
...
...
@@ -34,7 +34,7 @@ argument miopen_add::compute(context& ctx,
auto
a_desc
=
make_tensor
(
args
[
0
].
get_shape
());
auto
b_desc
=
make_tensor
(
args
[
1
].
get_shape
());
auto
c_desc
=
make_tensor
(
output_shape
);
miopenOpTensor
(
ctx
.
handle
.
get
(),
miopenOpTensor
(
ctx
.
get_stream
().
get_miopen
(),
miopenTensorOpAdd
,
&
alpha
,
a_desc
.
get
(),
...
...
src/targets/gpu/batchnorm.cpp
View file @
57444235
...
...
@@ -23,7 +23,7 @@ argument miopen_batch_norm_inference::compute(context& ctx,
float
alpha
=
1.0
,
beta
=
0.0
f
;
miopenBatchNormalizationForwardInference
(
ctx
.
handle
.
get
(),
miopenBatchNormalizationForwardInference
(
ctx
.
get_stream
().
get_miopen
(),
miopenBatchNormMode_t
(
op
.
bn_mode
),
&
alpha
,
&
beta
,
...
...
src/targets/gpu/concat.cpp
0 → 100644
View file @
57444235
#include <migraph/gpu/concat.hpp>
#include <migraph/operators.hpp>
#include <migraph/manage_ptr.hpp>
#include <migraph/gpu/miopen.hpp>
#include <migraph/gpu/device/concat.hpp>
#include <utility>
namespace
migraph
{
namespace
gpu
{
shape
hip_concat
::
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
{
inputs
.
pop_back
();
return
op
.
compute_shape
(
inputs
);
}
argument
hip_concat
::
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
{
std
::
vector
<
std
::
size_t
>
offsets
=
op
.
compute_offsets
(
output_shape
,
args
);
return
device
::
concat
(
ctx
.
get_stream
().
get
(),
output_shape
,
args
,
offsets
);
}
}
// namespace gpu
}
// namespace migraph
src/targets/gpu/contiguous.cpp
View file @
57444235
...
...
@@ -12,13 +12,14 @@ shape miopen_contiguous::compute_shape(const std::vector<shape>& inputs) const
check_shapes
{
inputs
,
*
this
}.
has
(
2
);
return
op
.
compute_shape
({
inputs
.
at
(
0
)});
}
argument
miopen_contiguous
::
compute
(
context
&
,
shape
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
argument
miopen_contiguous
::
compute
(
context
&
ctx
,
shape
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
{
assert
(
output_shape
==
args
[
1
].
get_shape
());
assert
(
output_shape
.
standard
());
(
void
)
output_shape
;
device
::
contiguous
(
args
.
at
(
1
),
args
.
at
(
0
));
device
::
contiguous
(
ctx
.
get_stream
().
get
(),
args
.
at
(
1
),
args
.
at
(
0
));
return
args
.
at
(
1
);
}
...
...
src/targets/gpu/convolution.cpp
View file @
57444235
...
...
@@ -21,7 +21,7 @@ argument miopen_convolution::compute(context& ctx,
auto
y_desc
=
make_tensor
(
output_shape
);
float
alpha
=
1
,
beta
=
0
;
miopenConvolutionForward
(
ctx
.
handle
.
get
(),
miopenConvolutionForward
(
ctx
.
get_stream
().
get_miopen
(),
&
alpha
,
x_desc
.
get
(),
args
[
0
].
implicit
(),
...
...
@@ -47,18 +47,22 @@ shape miopen_convolution::compile(context& ctx,
auto
y_desc
=
make_tensor
(
output_shape
);
std
::
size_t
workspace_size
=
0
;
miopenConvolutionForwardGetWorkSpaceSize
(
ctx
.
handle
.
get
(),
w_desc
.
get
(),
x_desc
.
get
(),
cd
.
get
(),
y_desc
.
get
(),
&
workspace_size
);
miopenConvolutionForwardGetWorkSpaceSize
(
ctx
.
get_stream
().
get_miopen
(),
w_desc
.
get
(),
x_desc
.
get
(),
cd
.
get
(),
y_desc
.
get
(),
&
workspace_size
);
workspace_shape
=
shape
{
shape
::
int8_type
,
{
workspace_size
}};
auto
x
=
to_gpu
(
generate_argument
(
inputs
[
0
]
->
get_shape
()));
auto
w
=
to_gpu
(
generate_argument
(
inputs
[
1
]
->
get_shape
()));
auto
y
=
to_gpu
(
generate_argument
(
output_shape
)
)
;
auto
y
=
allocate_gpu
(
output_shape
);
auto
workspace
=
allocate_gpu
(
workspace_shape
);
int
algo_count
=
1
;
miopenConvAlgoPerf_t
perf
;
miopenFindConvolutionForwardAlgorithm
(
ctx
.
handle
.
get
(),
miopenFindConvolutionForwardAlgorithm
(
ctx
.
get_stream
().
get_miopen
(),
x_desc
.
get
(),
x
.
implicit
(),
w_desc
.
get
(),
...
...
src/targets/gpu/device/add.cpp
View file @
57444235
...
...
@@ -5,14 +5,18 @@ namespace migraph {
namespace
gpu
{
namespace
device
{
void
add
(
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
)
void
add
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
)
{
nary
(
result
,
arg1
,
arg2
)([](
auto
x
,
auto
y
)
{
return
x
+
y
;
});
nary
(
stream
,
result
,
arg1
,
arg2
)([](
auto
x
,
auto
y
)
{
return
x
+
y
;
});
}
void
add
(
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
,
const
argument
&
arg3
)
void
add
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
,
const
argument
&
arg3
)
{
nary
(
result
,
arg1
,
arg2
,
arg3
)([](
auto
x
,
auto
y
,
auto
z
)
{
return
x
+
y
+
z
;
});
nary
(
stream
,
result
,
arg1
,
arg2
,
arg3
)([](
auto
x
,
auto
y
,
auto
z
)
{
return
x
+
y
+
z
;
});
}
}
// namespace device
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment