Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
3a474fca
Commit
3a474fca
authored
Oct 04, 2021
by
Khalique Ahmed
Browse files
Merge branch 'develop' of
https://github.com/ROCmSoftwarePlatform/AMDMIGraphX
into mi100_opts
parents
d9568511
0b7672d7
Changes
44
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
627 additions
and
16 deletions
+627
-16
.clang-tidy
.clang-tidy
+1
-1
CMakeLists.txt
CMakeLists.txt
+1
-1
Jenkinsfile
Jenkinsfile
+2
-1
src/CMakeLists.txt
src/CMakeLists.txt
+1
-0
src/include/migraphx/op/multinomial.hpp
src/include/migraphx/op/multinomial.hpp
+64
-0
src/onnx/parse_depthtospace.cpp
src/onnx/parse_depthtospace.cpp
+74
-0
src/onnx/parse_multinomial.cpp
src/onnx/parse_multinomial.cpp
+65
-0
src/onnx/parse_randomnormal_ops.cpp
src/onnx/parse_randomnormal_ops.cpp
+89
-0
src/onnx/parse_randomuniform_ops.cpp
src/onnx/parse_randomuniform_ops.cpp
+89
-0
src/targets/gpu/CMakeLists.txt
src/targets/gpu/CMakeLists.txt
+3
-0
src/targets/gpu/device/include/migraphx/gpu/device/reduce.hpp
...targets/gpu/device/include/migraphx/gpu/device/reduce.hpp
+15
-9
src/targets/gpu/device/layernorm.cpp
src/targets/gpu/device/layernorm.cpp
+15
-4
src/targets/gpu/device/multinomial.cpp
src/targets/gpu/device/multinomial.cpp
+66
-0
src/targets/gpu/include/migraphx/gpu/device/multinomial.hpp
src/targets/gpu/include/migraphx/gpu/device/multinomial.hpp
+23
-0
src/targets/gpu/include/migraphx/gpu/multinomial.hpp
src/targets/gpu/include/migraphx/gpu/multinomial.hpp
+36
-0
src/targets/gpu/lowering.cpp
src/targets/gpu/lowering.cpp
+1
-0
src/targets/gpu/multinomial.cpp
src/targets/gpu/multinomial.cpp
+27
-0
test/onnx/depthtospace_crd_test.onnx
test/onnx/depthtospace_crd_test.onnx
+19
-0
test/onnx/depthtospace_simple_test.onnx
test/onnx/depthtospace_simple_test.onnx
+17
-0
test/onnx/depthtospace_test.onnx
test/onnx/depthtospace_test.onnx
+19
-0
No files found.
.clang-tidy
View file @
3a474fca
...
@@ -4,7 +4,7 @@ CheckOptions:
...
@@ -4,7 +4,7 @@ CheckOptions:
- key: bugprone-unused-return-value.CheckedFunctions
- key: bugprone-unused-return-value.CheckedFunctions
value: '::std::async;::std::launder;::std::remove;::std::remove_if;::std::unique;::std::unique_ptr::release;::std::basic_string::empty;::std::vector::empty;::std::find;::std::find_if;::std::find_if_not;::std::all_of;::std::any_of;::std::none_of;::std::count;::std::count_if;::std::mismatch;::std::find_end;::std::find_first_of;::std::adjacent_find;::std::search;::std::search_n;::std::nth_element;::std::lower_bound;::std::upper_bound;::std::binary_search;::std::equal_range;::std::max;::std::max_element;::std::min;::std::min_element;::std::minmax;::std::minmax_element;::std::equal;::std::lexicographical_compare;::std::accumulate;::std::inner_product'
value: '::std::async;::std::launder;::std::remove;::std::remove_if;::std::unique;::std::unique_ptr::release;::std::basic_string::empty;::std::vector::empty;::std::find;::std::find_if;::std::find_if_not;::std::all_of;::std::any_of;::std::none_of;::std::count;::std::count_if;::std::mismatch;::std::find_end;::std::find_first_of;::std::adjacent_find;::std::search;::std::search_n;::std::nth_element;::std::lower_bound;::std::upper_bound;::std::binary_search;::std::equal_range;::std::max;::std::max_element;::std::min;::std::min_element;::std::minmax;::std::minmax_element;::std::equal;::std::lexicographical_compare;::std::accumulate;::std::inner_product'
- key: cppcoreguidelines-macro-usage.AllowedRegexp
- key: cppcoreguidelines-macro-usage.AllowedRegexp
value: 'DEBUG|FALLTHROUGH|STRINGIZE|_HAS_|_THROW|_REQUIRES|_DECLARE_|_VISIT_|_REGISTER_|_GENERATE_|_DETAIL_|_TIDY_|_MANAGE_PTR|_MATCHER|DEVICE_SHARED'
value: 'DEBUG|FALLTHROUGH|STRINGIZE|_HAS_|_THROW|_REQUIRES|_DECLARE_|_VISIT_|_REGISTER_|_GENERATE_|_DETAIL_|_TIDY_|_MANAGE_PTR|_MATCHER|DEVICE_SHARED
|_WORKAROUND_
'
- key: modernize-loop-convert.MinConfidence
- key: modernize-loop-convert.MinConfidence
value: risky
value: risky
- key: modernize-loop-convert.NamingStyle
- key: modernize-loop-convert.NamingStyle
...
...
CMakeLists.txt
View file @
3a474fca
...
@@ -223,7 +223,7 @@ rocm_create_package(
...
@@ -223,7 +223,7 @@ rocm_create_package(
MAINTAINER
"Paul Fultz II <paul.fultz@amd.com>"
MAINTAINER
"Paul Fultz II <paul.fultz@amd.com>"
LDCONFIG
LDCONFIG
PTH
PTH
DEPENDS miopen-hip rocblas hip-
hcc
half
DEPENDS miopen-hip rocblas hip-
rocclr hip-base
half
)
)
set
(
CMAKE_LIBRARY_OUTPUT_DIRECTORY
${
CMAKE_CURRENT_BINARY_DIR
}
/lib
)
set
(
CMAKE_LIBRARY_OUTPUT_DIRECTORY
${
CMAKE_CURRENT_BINARY_DIR
}
/lib
)
...
...
Jenkinsfile
View file @
3a474fca
...
@@ -124,8 +124,9 @@ def onnxnode(name, body) {
...
@@ -124,8 +124,9 @@ def onnxnode(name, body) {
rocmtest
onnx:
onnxnode
(
'rocmtest'
)
{
cmake_build
->
rocmtest
onnx:
onnxnode
(
'rocmtest'
)
{
cmake_build
->
stage
(
"Onnx runtime"
)
{
stage
(
"Onnx runtime"
)
{
sh
'''
sh
'''
apt install half
ls -lR
ls -lR
dpkg -i
--force-depends
./build/*.deb
dpkg -i ./build/*.deb
cd /onnxruntime && ./build_and_test_onnxrt.sh
cd /onnxruntime && ./build_and_test_onnxrt.sh
'''
'''
}
}
...
...
src/CMakeLists.txt
View file @
3a474fca
...
@@ -129,6 +129,7 @@ register_migraphx_ops(
...
@@ -129,6 +129,7 @@ register_migraphx_ops(
min
min
mul
mul
multibroadcast
multibroadcast
multinomial
neg
neg
outline
outline
pad
pad
...
...
src/include/migraphx/op/multinomial.hpp
0 → 100644
View file @
3a474fca
#ifndef MIGRAPHX_GUARD_OPERATORS_MULTINOMIAL_HPP
#define MIGRAPHX_GUARD_OPERATORS_MULTINOMIAL_HPP
#include <migraphx/operation.hpp>
#include <migraphx/check_shapes.hpp>
#include <migraphx/par_for.hpp>
#include <random>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
op
{
struct
multinomial
{
shape
::
type_t
dtype
=
shape
::
type_t
::
int32_type
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
pack
(
f
(
self
.
dtype
,
"dtype"
));
}
std
::
string
name
()
const
{
return
"multinomial"
;
}
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
{
check_shapes
{
inputs
,
*
this
}.
has
(
2
).
only_dims
(
2
);
size_t
sample_size
=
inputs
.
back
().
lens
().
back
();
if
(
not
contains
({
shape
::
int32_type
,
shape
::
int64_type
},
dtype
))
MIGRAPHX_THROW
(
"Multinomial: Invalid output type. Valid types are int32_type and int64_type."
);
return
{
dtype
,
{
inputs
.
front
().
lens
().
front
(),
sample_size
}};
}
argument
compute
(
const
shape
&
output_shape
,
std
::
vector
<
argument
>
args
)
const
{
argument
result
{
output_shape
};
size_t
batch_size
=
output_shape
.
lens
().
front
();
size_t
class_size
=
args
[
0
].
get_shape
().
lens
().
back
();
size_t
sample_size
=
output_shape
.
lens
().
back
();
visit_all
(
args
[
0
],
args
[
1
])([
&
](
auto
cdf
,
auto
dist
)
{
result
.
visit
([
&
](
auto
output
)
{
par_for
(
batch_size
*
sample_size
,
[
&
](
auto
i
)
{
auto
idx
=
args
[
1
].
get_shape
().
multi
(
i
);
auto
cdf_begin
=
cdf
.
begin
()
+
(
idx
[
0
]
*
class_size
);
auto
cdf_end
=
cdf_begin
+
class_size
;
auto
sample_iter
=
std
::
upper_bound
(
cdf_begin
,
cdf_end
,
dist
[
i
]
*
*
(
std
::
prev
(
cdf_end
)));
output
[
i
]
=
std
::
distance
(
cdf_begin
,
sample_iter
);
});
});
});
return
result
;
}
};
}
// namespace op
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/onnx/parse_depthtospace.cpp
0 → 100644
View file @
3a474fca
#include <migraphx/onnx/op_parser.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/make_op.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
onnx
{
struct
parse_depthtospace
:
op_parser
<
parse_depthtospace
>
{
std
::
vector
<
op_desc
>
operators
()
const
{
return
{{
"DepthToSpace"
}};
}
instruction_ref
parse
(
const
op_desc
&
/*opd*/
,
const
onnx_parser
&
/*parser*/
,
const
onnx_parser
::
node_info
&
info
,
std
::
vector
<
instruction_ref
>
args
)
const
{
auto
s
=
args
[
0
]
->
get_shape
();
// mode attribute of DepthToSpace
auto
mode
=
std
::
string
(
"DCR"
);
if
(
contains
(
info
.
attributes
,
"mode"
))
{
mode
=
info
.
attributes
.
at
(
"mode"
).
s
();
// DCR or CRD?
}
// blocksize attribute of DepthToSpace
int
blocksize
=
0
;
if
(
contains
(
info
.
attributes
,
"blocksize"
))
{
blocksize
=
info
.
attributes
.
at
(
"blocksize"
).
i
();
}
if
(
blocksize
<
1
)
{
MIGRAPHX_THROW
(
"DepthToSpace: blocksize is less than 1"
);
}
// calculate dimensions
auto
lens1
=
s
.
lens
();
auto
lens2
=
s
.
lens
();
unsigned
long
divisor
=
std
::
pow
(
blocksize
,
2
);
if
((
lens2
[
1
]
%
divisor
)
==
0
)
lens2
[
1
]
=
lens2
[
1
]
/
divisor
;
else
MIGRAPHX_THROW
(
"DepthToSpace: div by blocksize quotient not int "
);
lens1
.
push_back
(
lens1
[
2
]);
lens1
.
push_back
(
lens1
[
3
]);
lens2
[
2
]
=
lens2
[
2
]
*
blocksize
;
lens2
[
3
]
=
lens2
[
3
]
*
blocksize
;
lens1
[
2
]
=
blocksize
;
std
::
vector
<
int64_t
>
perm
;
if
(
mode
==
"DCR"
)
{
lens1
[
3
]
=
lens1
[
1
]
/
divisor
;
lens1
[
1
]
=
blocksize
;
perm
=
{
0
,
3
,
4
,
1
,
5
,
2
};
}
else
if
(
mode
==
"CRD"
)
{
lens1
[
1
]
=
lens1
[
1
]
/
divisor
;
lens1
[
3
]
=
blocksize
;
perm
=
{
0
,
1
,
4
,
2
,
5
,
3
};
}
else
MIGRAPHX_THROW
(
"DepthToSpace: mode attribute cannot be read."
);
auto
temp1
=
info
.
add_instruction
(
make_op
(
"reshape"
,
{{
"dims"
,
lens1
}}),
args
[
0
]);
auto
temp2
=
info
.
add_instruction
(
make_op
(
"transpose"
,
{{
"permutation"
,
perm
}}),
temp1
);
return
info
.
add_instruction
(
make_op
(
"reshape"
,
{{
"dims"
,
lens2
}}),
info
.
make_contiguous
(
temp2
));
}
};
}
// namespace onnx
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/onnx/parse_multinomial.cpp
0 → 100644
View file @
3a474fca
#include <migraphx/onnx/op_parser.hpp>
#include <migraphx/onnx/checks.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/make_op.hpp>
#include <random>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
onnx
{
struct
parse_multinomial
:
op_parser
<
parse_multinomial
>
{
std
::
vector
<
op_desc
>
operators
()
const
{
return
{{
"Multinomial"
}};
}
instruction_ref
parse
(
const
op_desc
&
/*opd*/
,
const
onnx_parser
&
/*parser*/
,
const
onnx_parser
::
node_info
&
info
,
std
::
vector
<
instruction_ref
>
args
)
const
{
int
dtype
=
6
;
if
(
contains
(
info
.
attributes
,
"dtype"
))
dtype
=
info
.
attributes
.
at
(
"dtype"
).
i
();
shape
::
type_t
output_type
=
get_type
(
dtype
);
size_t
sample_size
=
1
;
if
(
contains
(
info
.
attributes
,
"sample_size"
))
sample_size
=
info
.
attributes
.
at
(
"sample_size"
).
i
();
float
seed
=
static_cast
<
float
>
(
std
::
chrono
::
high_resolution_clock
::
now
().
time_since_epoch
().
count
());
if
(
contains
(
info
.
attributes
,
"seed"
))
seed
=
info
.
attributes
.
at
(
"seed"
).
f
();
// Subtract the per-batch maximum log-probability, making the per-batch max 0
auto
maxes
=
info
.
add_instruction
(
migraphx
::
make_op
(
"reduce_max"
,
{{
"axes"
,
{
1
}}}),
args
[
0
]);
auto
mb_maxes
=
info
.
add_instruction
(
migraphx
::
make_op
(
"multibroadcast"
,
{{
"out_lens"
,
args
[
0
]
->
get_shape
().
lens
()}}),
maxes
);
auto
cdf
=
info
.
add_instruction
(
migraphx
::
make_op
(
"sub"
),
args
[
0
],
mb_maxes
);
// Take the element-wise exponent to get probabilities in the range (0, 1]
cdf
=
info
.
add_instruction
(
migraphx
::
make_op
(
"exp"
),
cdf
);
// Compute the cumulative density function
cdf
=
info
.
add_instruction
(
migraphx
::
make_op
(
"prefix_scan_sum"
,
{{
"axis"
,
1
},
{
"exclusive"
,
false
}}),
cdf
);
// Pre-compute random distribution
std
::
mt19937
gen
(
seed
);
std
::
uniform_real_distribution
<>
dis
(
0.0
,
1.0
);
size_t
batch_size
=
args
[
0
]
->
get_shape
().
lens
().
front
();
migraphx
::
shape
dist_shape
{
migraphx
::
shape
::
float_type
,
{
batch_size
,
sample_size
}};
std
::
vector
<
float
>
random_dist
(
batch_size
*
sample_size
);
std
::
generate
(
random_dist
.
begin
(),
random_dist
.
end
(),
[
&
]()
{
return
dis
(
gen
);
});
auto
dist_lit
=
info
.
add_literal
(
migraphx
::
literal
{
dist_shape
,
random_dist
});
return
info
.
add_instruction
(
migraphx
::
make_op
(
"multinomial"
,
{{
"dtype"
,
output_type
}}),
cdf
,
dist_lit
);
}
};
}
// namespace onnx
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/onnx/parse_randomnormal_ops.cpp
0 → 100644
View file @
3a474fca
#include <migraphx/onnx/op_parser.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/onnx/checks.hpp>
#include <random>
#include <set>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
onnx
{
struct
parse_randomnormal_ops
:
op_parser
<
parse_randomnormal_ops
>
{
const
std
::
set
<
shape
::
type_t
>
valid_types
=
{
shape
::
float_type
,
shape
::
half_type
,
shape
::
double_type
};
std
::
vector
<
op_desc
>
operators
()
const
{
return
{{
"RandomNormal"
},
{
"RandomNormalLike"
}};
}
instruction_ref
parse
(
const
op_desc
&
opd
,
const
onnx_parser
&
parser
,
const
onnx_parser
::
node_info
&
info
,
std
::
vector
<
instruction_ref
>
args
)
const
{
int
dtype
=
1
;
bool
use_dtype
=
false
;
if
(
contains
(
info
.
attributes
,
"dtype"
))
{
dtype
=
info
.
attributes
.
at
(
"dtype"
).
i
();
use_dtype
=
true
;
}
shape
::
type_t
out_type
=
get_type
(
dtype
);
if
(
not
contains
(
valid_types
,
out_type
))
MIGRAPHX_THROW
(
opd
.
op_name
+
": invalid output type: "
+
std
::
to_string
(
dtype
)
+
". Valid types are 1 (float), 10 (half), and 11 (double)."
);
float
mean
=
0.0
;
if
(
contains
(
info
.
attributes
,
"mean"
))
mean
=
info
.
attributes
.
at
(
"mean"
).
f
();
float
scale
=
1.0
;
if
(
contains
(
info
.
attributes
,
"scale"
))
scale
=
info
.
attributes
.
at
(
"scale"
).
f
();
float
seed
=
static_cast
<
float
>
(
std
::
chrono
::
high_resolution_clock
::
now
().
time_since_epoch
().
count
());
if
(
contains
(
info
.
attributes
,
"seed"
))
seed
=
info
.
attributes
.
at
(
"seed"
).
f
();
shape
out_shape
;
if
(
contains
(
info
.
attributes
,
"shape"
))
{
// RandomNormal:
// output type and shape must come from attributes
std
::
vector
<
int
>
out_lens
;
literal
ls
=
parser
.
parse_value
(
info
.
attributes
.
at
(
"shape"
));
ls
.
visit
([
&
](
auto
s
)
{
out_lens
.
assign
(
s
.
begin
(),
s
.
end
());
});
out_shape
=
shape
{
out_type
,
out_lens
};
}
else
if
(
args
.
size
()
==
1
)
{
// RandomNormalLike:
// output type and shape are the same as the input's by default
// dtype is used instead when attribute is set
if
(
not
contains
(
valid_types
,
args
[
0
]
->
get_shape
().
type
()))
MIGRAPHX_THROW
(
opd
.
op_name
+
": invalid output type: "
+
std
::
to_string
(
args
[
0
]
->
get_shape
().
type
())
+
". Valid types are float, half, and double."
);
out_shape
=
use_dtype
?
shape
{
out_type
,
args
[
0
]
->
get_shape
().
lens
()}
:
args
[
0
]
->
get_shape
();
}
else
{
MIGRAPHX_THROW
(
opd
.
op_name
+
": cannot deduce shape without shape attribute or argument."
);
}
std
::
mt19937
gen
(
seed
);
std
::
normal_distribution
<>
d
(
mean
,
scale
);
std
::
vector
<
double
>
rand_vals
(
out_shape
.
elements
());
std
::
generate
(
rand_vals
.
begin
(),
rand_vals
.
end
(),
[
&
]()
{
return
d
(
gen
);
});
return
info
.
add_literal
(
literal
{
out_shape
,
rand_vals
});
}
};
}
// namespace onnx
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/onnx/parse_randomuniform_ops.cpp
0 → 100644
View file @
3a474fca
#include <migraphx/onnx/op_parser.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/onnx/checks.hpp>
#include <random>
#include <set>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
onnx
{
struct
parse_randomuniform_ops
:
op_parser
<
parse_randomuniform_ops
>
{
const
std
::
set
<
shape
::
type_t
>
valid_types
=
{
shape
::
float_type
,
shape
::
half_type
,
shape
::
double_type
};
std
::
vector
<
op_desc
>
operators
()
const
{
return
{{
"RandomUniform"
},
{
"RandomUniformLike"
}};
}
instruction_ref
parse
(
const
op_desc
&
opd
,
const
onnx_parser
&
parser
,
const
onnx_parser
::
node_info
&
info
,
std
::
vector
<
instruction_ref
>
args
)
const
{
int
dtype
=
1
;
bool
use_dtype
=
false
;
if
(
contains
(
info
.
attributes
,
"dtype"
))
{
dtype
=
info
.
attributes
.
at
(
"dtype"
).
i
();
use_dtype
=
true
;
}
shape
::
type_t
out_type
=
get_type
(
dtype
);
if
(
not
contains
(
valid_types
,
out_type
))
MIGRAPHX_THROW
(
opd
.
op_name
+
": invalid output type: "
+
std
::
to_string
(
dtype
)
+
". Valid types are 1 (float), 10 (half), and 11 (double)."
);
float
high
=
1.0
;
if
(
contains
(
info
.
attributes
,
"high"
))
high
=
info
.
attributes
.
at
(
"high"
).
f
();
float
low
=
0.0
;
if
(
contains
(
info
.
attributes
,
"low"
))
low
=
info
.
attributes
.
at
(
"low"
).
f
();
float
seed
=
static_cast
<
float
>
(
std
::
chrono
::
high_resolution_clock
::
now
().
time_since_epoch
().
count
());
if
(
contains
(
info
.
attributes
,
"seed"
))
seed
=
info
.
attributes
.
at
(
"seed"
).
f
();
shape
out_shape
;
if
(
contains
(
info
.
attributes
,
"shape"
))
{
// RandomUniform:
// output type and shape must come from attributes
std
::
vector
<
int
>
out_lens
;
literal
ls
=
parser
.
parse_value
(
info
.
attributes
.
at
(
"shape"
));
ls
.
visit
([
&
](
auto
s
)
{
out_lens
.
assign
(
s
.
begin
(),
s
.
end
());
});
out_shape
=
shape
{
out_type
,
out_lens
};
}
else
if
(
args
.
size
()
==
1
)
{
// RandomUniformLike:
// output type and shape are the same as the input by default
// dtype is used instead when attribute is set
if
(
not
contains
(
valid_types
,
args
[
0
]
->
get_shape
().
type
()))
MIGRAPHX_THROW
(
opd
.
op_name
+
": invalid output type: "
+
std
::
to_string
(
args
[
0
]
->
get_shape
().
type
())
+
". Valid types are float, half, and double."
);
out_shape
=
use_dtype
?
shape
{
out_type
,
args
[
0
]
->
get_shape
().
lens
()}
:
args
[
0
]
->
get_shape
();
}
else
{
MIGRAPHX_THROW
(
opd
.
op_name
+
": cannot deduce shape without shape attribute or argument."
);
}
std
::
mt19937
gen
(
seed
);
std
::
uniform_real_distribution
<>
d
(
high
,
low
);
std
::
vector
<
double
>
rand_vals
(
out_shape
.
elements
());
std
::
generate
(
rand_vals
.
begin
(),
rand_vals
.
end
(),
[
&
]()
{
return
d
(
gen
);
});
return
info
.
add_literal
(
literal
{
out_shape
,
rand_vals
});
}
};
}
// namespace onnx
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/CMakeLists.txt
View file @
3a474fca
...
@@ -59,6 +59,7 @@ add_library(migraphx_device
...
@@ -59,6 +59,7 @@ add_library(migraphx_device
device/mul.cpp
device/mul.cpp
device/mul_add.cpp
device/mul_add.cpp
device/mul_add_relu.cpp
device/mul_add_relu.cpp
device/multinomial.cpp
device/pad.cpp
device/pad.cpp
device/pow.cpp
device/pow.cpp
device/prelu.cpp
device/prelu.cpp
...
@@ -143,6 +144,7 @@ add_library(migraphx_gpu
...
@@ -143,6 +144,7 @@ add_library(migraphx_gpu
lrn.cpp
lrn.cpp
leaky_relu.cpp
leaky_relu.cpp
mlir_conv.cpp
mlir_conv.cpp
multinomial.cpp
pack_args.cpp
pack_args.cpp
pack_int8_args.cpp
pack_int8_args.cpp
pad.cpp
pad.cpp
...
@@ -199,6 +201,7 @@ register_migraphx_gpu_ops(hip_
...
@@ -199,6 +201,7 @@ register_migraphx_gpu_ops(hip_
max
max
min
min
mul
mul
multinomial
pad
pad
pow
pow
prelu
prelu
...
...
src/targets/gpu/device/include/migraphx/gpu/device/reduce.hpp
100755 → 100644
View file @
3a474fca
...
@@ -12,10 +12,6 @@ inline namespace MIGRAPHX_INLINE_NS {
...
@@ -12,10 +12,6 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace
gpu
{
namespace
gpu
{
namespace
device
{
namespace
device
{
#if __AMDGCN_WAVEFRONT_SIZE == 32
#define MIGRAPHX_NO_DPP
#endif
#ifdef MIGRAPHX_NO_DPP
#ifdef MIGRAPHX_NO_DPP
template
<
index_int
N
,
template
<
index_int
N
,
class
Op
,
class
Op
,
...
@@ -98,10 +94,12 @@ __device__ void dpp_reduce(T& in, Op op)
...
@@ -98,10 +94,12 @@ __device__ void dpp_reduce(T& in, Op op)
in
=
op
(
in
,
out
);
in
=
op
(
in
,
out
);
out
=
dpp_mov
<
dpp_row_shr
(
8
),
0xf
,
0xc
>
(
in
);
out
=
dpp_mov
<
dpp_row_shr
(
8
),
0xf
,
0xc
>
(
in
);
in
=
op
(
in
,
out
);
in
=
op
(
in
,
out
);
#if __AMDGCN_WAVEFRONT_SIZE == 64
out
=
dpp_mov
<
dpp_row_bcast
(
15
),
0xa
>
(
in
);
out
=
dpp_mov
<
dpp_row_bcast
(
15
),
0xa
>
(
in
);
in
=
op
(
in
,
out
);
in
=
op
(
in
,
out
);
out
=
dpp_mov
<
dpp_row_bcast
(
31
),
0xc
>
(
in
);
out
=
dpp_mov
<
dpp_row_bcast
(
31
),
0xc
>
(
in
);
in
=
op
(
in
,
out
);
in
=
op
(
in
,
out
);
#endif
}
}
__device__
inline
void
dpp_reduce
(
float
&
x
,
sum
)
__device__
inline
void
dpp_reduce
(
float
&
x
,
sum
)
...
@@ -118,9 +116,11 @@ __device__ inline void dpp_reduce(float& x, sum)
...
@@ -118,9 +116,11 @@ __device__ inline void dpp_reduce(float& x, sum)
"s_nop 1
\n
"
"s_nop 1
\n
"
"v_add_f32 %0 %0 %0 row_shr:8 bank_mask:0xc
\n
"
"v_add_f32 %0 %0 %0 row_shr:8 bank_mask:0xc
\n
"
"s_nop 1
\n
"
"s_nop 1
\n
"
#if __AMDGCN_WAVEFRONT_SIZE == 64
"v_add_f32 %0 %0 %0 row_bcast:15 row_mask:0xa
\n
"
"v_add_f32 %0 %0 %0 row_bcast:15 row_mask:0xa
\n
"
"s_nop 1
\n
"
"s_nop 1
\n
"
"v_add_f32 %0 %0 %0 row_bcast:31 row_mask:0xc
\n
"
"v_add_f32 %0 %0 %0 row_bcast:31 row_mask:0xc
\n
"
#endif
"s_nop 1
\n
"
"s_nop 1
\n
"
:
"=v"
(
x
)
:
"=v"
(
x
)
:
"0"
(
x
));
:
"0"
(
x
));
...
@@ -135,21 +135,27 @@ template <index_int N,
...
@@ -135,21 +135,27 @@ template <index_int N,
MIGRAPHX_REQUIRES
(
not
std
::
is_integral
<
ForStride
>{})
>
MIGRAPHX_REQUIRES
(
not
std
::
is_integral
<
ForStride
>{})
>
__device__
auto
block_reduce
(
index
idx
,
Op
op
,
T
init
,
ForStride
fs
,
F
f
)
__device__
auto
block_reduce
(
index
idx
,
Op
op
,
T
init
,
ForStride
fs
,
F
f
)
{
{
#if __AMDGCN_WAVEFRONT_SIZE == 32
constexpr
index_int
nthreads
=
16
;
#else
constexpr
index_int
nthreads
=
64
;
#endif
using
type
=
decltype
(
f
(
deduce_for_stride
(
fs
)));
using
type
=
decltype
(
f
(
deduce_for_stride
(
fs
)));
MIGRAPHX_DEVICE_SHARED
type
buffer
[
N
/
64
];
MIGRAPHX_DEVICE_SHARED
type
buffer
[
N
/
nthreads
];
type
x
=
init
;
type
x
=
init
;
fs
([
&
](
auto
i
)
{
x
=
op
(
x
,
f
(
i
));
});
fs
([
&
](
auto
i
)
{
x
=
op
(
x
,
f
(
i
));
});
dpp_reduce
(
x
,
op
);
dpp_reduce
(
x
,
op
);
const
auto
ldsidx
=
idx
.
local
/
64
;
const
auto
ldsidx
=
idx
.
local
/
nthreads
;
if
((
idx
.
local
%
64
)
==
63
)
if
((
idx
.
local
%
nthreads
)
==
nthreads
-
1
)
{
{
buffer
[
ldsidx
]
=
x
;
buffer
[
ldsidx
]
=
x
;
}
}
__syncthreads
();
__syncthreads
();
type
y
=
init
;
type
y
=
init
;
for
(
index_int
i
=
0
;
i
<
idx
.
nlocal
()
/
64
;
i
++
)
for
(
index_int
i
=
0
;
i
<
idx
.
nlocal
()
/
nthreads
;
i
++
)
{
{
y
=
op
(
y
,
buffer
[
i
]);
y
=
op
(
y
,
buffer
[
i
]);
}
}
...
...
src/targets/gpu/device/layernorm.cpp
100755 → 100644
View file @
3a474fca
...
@@ -8,6 +8,14 @@ inline namespace MIGRAPHX_INLINE_NS {
...
@@ -8,6 +8,14 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace
gpu
{
namespace
gpu
{
namespace
device
{
namespace
device
{
#ifndef MIGRAPHX_WORKAROUND_NAVI_DPP_SYNC
#if __AMDGCN_WAVEFRONT_SIZE == 32
#define MIGRAPHX_WORKAROUND_NAVI_DPP_SYNC 1
#else
#define MIGRAPHX_WORKAROUND_NAVI_DPP_SYNC 0
#endif
#endif
template
<
class
T
>
template
<
class
T
>
struct
vector_type
struct
vector_type
{
{
...
@@ -86,10 +94,13 @@ __device__ void layernorm(index_int i,
...
@@ -86,10 +94,13 @@ __device__ void layernorm(index_int i,
const
bool
in_range
=
idx
.
local
<
relements_v
;
const
bool
in_range
=
idx
.
local
<
relements_v
;
auto
mean
=
[
&
](
auto
z
)
{
auto
mean
=
[
&
](
auto
z
)
{
return
auto_block_reduce
<
MaxBlockSize
>
(
auto
m
=
auto_block_reduce
<
MaxBlockSize
>
(
idx
,
sum
{},
value_type
(
0
),
relements_v
,
[
=
](
auto
)
{
return
z
;
})
/
idx
,
sum
{},
value_type
(
0
),
relements_v
,
[
=
](
auto
)
{
return
z
;
})
/
value_type
(
relements
);
value_type
(
relements
);
#if MIGRAPHX_WORKAROUND_NAVI_DPP_SYNC
__builtin_amdgcn_s_barrier
();
#endif
return
m
;
};
};
// m = x - mean(x)
// m = x - mean(x)
...
...
src/targets/gpu/device/multinomial.cpp
0 → 100644
View file @
3a474fca
#include <migraphx/shape.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/dfor.hpp>
#include <migraphx/gpu/device/multinomial.hpp>
#include <migraphx/gpu/device/tensor.hpp>
#include <migraphx/gpu/device/launch.hpp>
#include <migraphx/gpu/device/types.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
template
<
class
Iterator
,
class
T
>
constexpr
Iterator
upper_bound
(
Iterator
first
,
Iterator
last
,
const
T
&
value
)
{
Iterator
it
;
typename
std
::
iterator_traits
<
Iterator
>::
difference_type
count
;
typename
std
::
iterator_traits
<
Iterator
>::
difference_type
step
;
count
=
std
::
distance
(
first
,
last
);
while
(
count
>
0
)
{
it
=
first
;
step
=
count
/
2
;
std
::
advance
(
it
,
step
);
if
(
!
(
value
<
*
it
))
{
first
=
++
it
;
count
-=
step
+
1
;
}
else
count
=
step
;
}
return
first
;
}
void
multinomial
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg0
,
const
argument
&
arg1
)
{
size_t
batch_size
=
arg0
.
get_shape
().
lens
().
front
();
size_t
class_size
=
arg0
.
get_shape
().
lens
().
back
();
size_t
sample_size
=
result
.
get_shape
().
lens
().
back
();
hip_visit_all
(
arg0
,
arg1
)([
&
](
auto
cdf
,
auto
dist
)
{
result
.
visit
([
&
](
auto
out
)
{
hip_visit_views
(
out
)([
&
](
auto
output
)
{
gs_launch
(
stream
,
batch_size
*
sample_size
)([
=
](
auto
i
)
__device__
{
auto
idx
=
output
.
get_shape
().
multi
(
i
);
auto
cdf_begin
=
cdf
.
begin
()
+
(
idx
.
front
()
*
class_size
);
auto
cdf_end
=
cdf_begin
+
class_size
;
auto
sample_iter
=
upper_bound
(
cdf_begin
,
cdf_end
,
dist
[
i
]
*
*
(
std
::
prev
(
cdf_end
)));
output
[
i
]
=
std
::
distance
(
cdf_begin
,
sample_iter
);
});
});
});
});
}
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/include/migraphx/gpu/device/multinomial.hpp
0 → 100644
View file @
3a474fca
#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_MULTINOMIAL_HPP
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_MULTINOMIAL_HPP
#include <migraphx/argument.hpp>
#include <migraphx/config.hpp>
#include <hip/hip_runtime_api.h>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
void
multinomial
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg0
,
const
argument
&
arg1
);
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/include/migraphx/gpu/multinomial.hpp
0 → 100644
View file @
3a474fca
#ifndef MIGRAPHX_GUARD_RTGLIB_MULTINOMIAL_HPP
#define MIGRAPHX_GUARD_RTGLIB_MULTINOMIAL_HPP
#include <migraphx/op/multinomial.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
struct
context
;
struct
hip_multinomial
{
op
::
multinomial
op
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
migraphx
::
reflect
(
self
.
op
,
f
);
}
std
::
string
name
()
const
{
return
"gpu::multinomial"
;
}
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
;
argument
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
;
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/lowering.cpp
View file @
3a474fca
...
@@ -164,6 +164,7 @@ struct miopen_apply
...
@@ -164,6 +164,7 @@ struct miopen_apply
add_extend_op
(
"leaky_relu"
);
add_extend_op
(
"leaky_relu"
);
add_extend_op
(
"logsoftmax"
);
add_extend_op
(
"logsoftmax"
);
add_extend_op
(
"lrn"
);
add_extend_op
(
"lrn"
);
add_extend_op
(
"multinomial"
);
add_extend_op
(
"pad"
);
add_extend_op
(
"pad"
);
add_extend_op
(
"pooling"
);
add_extend_op
(
"pooling"
);
add_extend_op
(
"prefix_scan_sum"
);
add_extend_op
(
"prefix_scan_sum"
);
...
...
src/targets/gpu/multinomial.cpp
0 → 100644
View file @
3a474fca
#include <migraphx/gpu/multinomial.hpp>
#include <migraphx/gpu/device/multinomial.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/tune_axis.hpp>
#include <migraphx/check_shapes.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
shape
hip_multinomial
::
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
{
check_shapes
{
inputs
,
*
this
}.
has
(
3
).
only_dims
(
2
).
standard
();
inputs
.
pop_back
();
return
op
.
compute_shape
(
inputs
);
}
argument
hip_multinomial
::
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
{
device
::
multinomial
(
ctx
.
get_stream
().
get
(),
args
.
back
(),
args
.
front
(),
args
[
1
]);
return
args
.
back
();
}
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
test/onnx/depthtospace_crd_test.onnx
0 → 100644
View file @
3a474fca
depthtospace_crd_test:
6
xy"DepthToSpace*
blocksize*
mode"CRDdepthtospace_crd_testZ
x
b
y
B
\ No newline at end of file
test/onnx/depthtospace_simple_test.onnx
0 → 100644
View file @
3a474fca
depthtospace_simple_test:
6
xy"DepthToSpace*
blocksize*
mode"DCRdepthtospace_simple_testZ
x
b
y
B
\ No newline at end of file
test/onnx/depthtospace_test.onnx
0 → 100644
View file @
3a474fca
depthtospace_test:
6
xy"DepthToSpace*
blocksize*
mode"DCRdepthtospace_testZ
x
b
y
B
\ No newline at end of file
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment