Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
a24ed87e
Unverified
Commit
a24ed87e
authored
Dec 05, 2023
by
Chris Austen
Committed by
GitHub
Dec 05, 2023
Browse files
Merge branch 'develop' into optimize_jenkinsfile
parents
6481cd69
a09dc502
Changes
391
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1112 additions
and
309 deletions
+1112
-309
src/include/migraphx/op/isinf.hpp
src/include/migraphx/op/isinf.hpp
+16
-15
src/include/migraphx/op/multinomial.hpp
src/include/migraphx/op/multinomial.hpp
+70
-13
src/include/migraphx/op/nearbyint.hpp
src/include/migraphx/op/nearbyint.hpp
+22
-17
src/include/migraphx/op/normalize_attribute.hpp
src/include/migraphx/op/normalize_attribute.hpp
+2
-0
src/include/migraphx/op/pooling.hpp
src/include/migraphx/op/pooling.hpp
+40
-11
src/include/migraphx/op/prefix_scan_op.hpp
src/include/migraphx/op/prefix_scan_op.hpp
+6
-0
src/include/migraphx/op/quantizelinear.hpp
src/include/migraphx/op/quantizelinear.hpp
+5
-5
src/include/migraphx/op/random_uniform.hpp
src/include/migraphx/op/random_uniform.hpp
+2
-3
src/include/migraphx/op/scatternd_max.hpp
src/include/migraphx/op/scatternd_max.hpp
+13
-12
src/include/migraphx/op/scatternd_min.hpp
src/include/migraphx/op/scatternd_min.hpp
+15
-12
src/include/migraphx/op/scatternd_op.hpp
src/include/migraphx/op/scatternd_op.hpp
+3
-2
src/include/migraphx/op/slice.hpp
src/include/migraphx/op/slice.hpp
+323
-136
src/include/migraphx/op/unary.hpp
src/include/migraphx/op/unary.hpp
+5
-4
src/include/migraphx/op/unique.hpp
src/include/migraphx/op/unique.hpp
+323
-0
src/include/migraphx/operators.hpp
src/include/migraphx/operators.hpp
+4
-1
src/include/migraphx/par.hpp
src/include/migraphx/par.hpp
+133
-0
src/include/migraphx/par_for.hpp
src/include/migraphx/par_for.hpp
+7
-77
src/include/migraphx/rewrite_pooling.hpp
src/include/migraphx/rewrite_pooling.hpp
+1
-0
src/include/migraphx/shape.hpp
src/include/migraphx/shape.hpp
+3
-1
src/include/migraphx/simple_par_for.hpp
src/include/migraphx/simple_par_for.hpp
+119
-0
No files found.
src/
targets/gpu/
include/migraphx/
gpu/int8_conv_pack
.hpp
→
src/include/migraphx/
op/isinf
.hpp
View file @
a24ed87e
/*
/*
* The MIT License (MIT)
* The MIT License (MIT)
*
*
* Copyright (c) 2015-202
2
Advanced Micro Devices, Inc. All rights reserved.
* Copyright (c) 2015-202
3
Advanced Micro Devices, Inc. All rights reserved.
*
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* of this software and associated documentation files (the "Software"), to deal
...
@@ -21,31 +21,32 @@
...
@@ -21,31 +21,32 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* THE SOFTWARE.
*/
*/
#ifndef MIGRAPHX_GUARD_
RTGLIB_INT8_CONV_PACK
_HPP
#ifndef MIGRAPHX_GUARD_
OPERATORS_ISINF
_HPP
#define MIGRAPHX_GUARD_
RTGLIB_INT8_CONV_PACK
_HPP
#define MIGRAPHX_GUARD_
OPERATORS_ISINF
_HPP
#include <migraphx/
argument
.hpp>
#include <migraphx/
op/unary
.hpp>
#include <migraphx/config.hpp>
#include <migraphx/config.hpp>
#include <utility>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
op
{
struct
context
;
struct
isinf
:
unary
<
isinf
>
struct
miopen_int8_conv_pack
{
{
std
::
string
name
()
const
{
return
"gpu::int8_conv_pack"
;
}
auto
apply
()
const
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
;
{
argument
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
;
return
[
&
](
auto
x
)
{
return
std
::
isinf
(
static_cast
<
double
>
(
x
));
};
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
}
std
::
string
name
()
const
{
return
"isinf"
;
}
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
{
{
return
shapes
.
size
()
-
1
;
return
unary
<
isinf
>::
compute_shape
(
std
::
move
(
inputs
)).
with_type
(
shape
::
bool_type
)
;
}
}
};
};
}
// namespace
gpu
}
// namespace
op
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
}
// namespace migraphx
...
...
src/include/migraphx/op/multinomial.hpp
View file @
a24ed87e
/*
/*
* The MIT License (MIT)
* The MIT License (MIT)
*
*
* Copyright (c) 2015-202
2
Advanced Micro Devices, Inc. All rights reserved.
* Copyright (c) 2015-202
3
Advanced Micro Devices, Inc. All rights reserved.
*
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* of this software and associated documentation files (the "Software"), to deal
...
@@ -21,11 +21,52 @@
...
@@ -21,11 +21,52 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* THE SOFTWARE.
*/
*/
/**
* * Multinomial or categorical distribution. Performs a sampling of random input
* and returns a count of
* each category, or bucket. This does not require the standard multinomial
* distribution but instead takes a probability distribution, i.e. cumulative
* distribution function (CDF) as its first input.
*
* Inputs: args[0] - a tensor of probabilities for each category. Values are
* cumulative density function
* totals as provided by operation prefix_scan_sum. Values are
* cumulative probabilities (i.e. start with any set of numbers > 0
* and then apply prefix_scan_sum). Values do not need to be
* normalized to sum to 1; this is done in runtime computation.
*
* This input has Rank 2. Dimension 0 is batch #, so that there can be
* a different CDF for each iteration in the batch. The size of dimension
* 1 is the number of categories.
*
* args[1] - a tensor of random numbers. The last dimension is the sample
* size, i.e. the number of
* random samples in each iteration of the batch. Nominally
* has two dimensions where the first dimension is batch size, but
* any reshaping such that the total
* number of elements is (batch_size * sample_size) is legal.
*
* Values as created by a std::mt19937 like this:
*
* size_t sample_size = 100000;
* float seed = 0.0f;
* std::mt19937 gen(seed);
* std::uniform_real_distribution<> dis(0.0, 1.0);
* std::vector<float> rand_samples(sample_size);
* std::generate(rand_samples.begin(), rand_samples.end(), [&]() { return
* dis(gen); });
*
* Output: A 2D vector of category each input. Dimensions are (Input 1[first], Input
2[last]).
*
*/
#ifndef MIGRAPHX_GUARD_OPERATORS_MULTINOMIAL_HPP
#ifndef MIGRAPHX_GUARD_OPERATORS_MULTINOMIAL_HPP
#define MIGRAPHX_GUARD_OPERATORS_MULTINOMIAL_HPP
#define MIGRAPHX_GUARD_OPERATORS_MULTINOMIAL_HPP
#include <migraphx/check_shapes.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/check_shapes.hpp>
#include <migraphx/dyn_output.hpp>
#include <migraphx/par_for.hpp>
#include <migraphx/par_for.hpp>
#include <migraphx/reflect.hpp>
#include <migraphx/reflect.hpp>
#include <random>
#include <random>
...
@@ -47,22 +88,35 @@ struct multinomial
...
@@ -47,22 +88,35 @@ struct multinomial
std
::
string
name
()
const
{
return
"multinomial"
;
}
std
::
string
name
()
const
{
return
"multinomial"
;
}
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
{
{
check_shapes
{
inputs
,
*
this
}.
has
(
2
).
only_dims
(
2
);
check_shapes
{
inputs
,
*
this
,
true
}.
has
(
2
).
only_dims
(
2
);
size_t
sample_size
=
inputs
.
back
().
lens
().
back
();
if
(
not
contains
({
shape
::
int32_type
,
shape
::
int64_type
},
dtype
))
if
(
inputs
.
back
().
ndim
()
<
1
)
MIGRAPHX_THROW
(
MIGRAPHX_THROW
(
"Multinomial: Second input shape (sample) has no dimensions"
);
"Multinomial: Invalid output type. Valid types are int32_type and int64_type."
);
if
(
dtype
==
shape
::
bool_type
)
MIGRAPHX_THROW
(
"Multinomial: boolean output type invalid."
);
return
{
dtype
,
{
inputs
.
front
().
lens
().
front
(),
sample_size
}};
// Output takes one dimension from each of the two input shapes. If they are both fixed,
// return a static shape
if
((
not
inputs
.
front
().
dynamic
())
or
(
inputs
.
front
().
dyn_dims
().
front
().
is_fixed
()))
{
if
((
not
inputs
.
back
().
dynamic
())
or
(
inputs
.
back
().
dyn_dims
().
back
().
is_fixed
()))
{
size_t
batch
=
{
inputs
.
front
().
max_lens
().
front
()};
size_t
sample_size
{
inputs
.
back
().
max_lens
().
back
()};
return
{
dtype
,
{
batch
,
sample_size
}};
}
}
return
{
dtype
,
{
inputs
.
front
().
to_dynamic
().
dyn_dims
().
front
(),
inputs
.
back
().
to_dynamic
().
dyn_dims
().
back
()}};
}
}
argument
compute
(
const
shape
&
output_shape
,
std
::
vector
<
argument
>
args
)
const
argument
compute
(
const
dyn_output
&
dyn_out
,
std
::
vector
<
argument
>
args
)
const
{
{
argument
result
{
out
put_shape
};
argument
result
{
dyn_out
.
com
put
ed
_shape
};
size_t
batch_size
=
out
put_shape
.
lens
().
front
();
size_t
batch_size
=
dyn_out
.
com
put
ed
_shape
.
lens
().
front
();
size_t
class_size
=
args
[
0
].
get_shape
().
lens
().
back
();
size_t
class_size
=
args
[
0
].
get_shape
().
lens
().
back
();
size_t
sample_size
=
out
put_shape
.
lens
().
back
();
size_t
sample_size
=
dyn_out
.
com
put
ed
_shape
.
lens
().
back
();
visit_all
(
args
[
0
],
args
[
1
])([
&
](
auto
cdf
,
auto
dist
)
{
visit_all
(
args
[
0
],
args
[
1
])([
&
](
auto
cdf
,
auto
dist
)
{
result
.
visit
([
&
](
auto
output
)
{
result
.
visit
([
&
](
auto
output
)
{
...
@@ -70,13 +124,16 @@ struct multinomial
...
@@ -70,13 +124,16 @@ struct multinomial
auto
idx
=
args
[
1
].
get_shape
().
multi
(
i
);
auto
idx
=
args
[
1
].
get_shape
().
multi
(
i
);
auto
cdf_begin
=
cdf
.
begin
()
+
(
idx
[
0
]
*
class_size
);
auto
cdf_begin
=
cdf
.
begin
()
+
(
idx
[
0
]
*
class_size
);
auto
cdf_end
=
cdf_begin
+
class_size
;
auto
cdf_end
=
cdf_begin
+
class_size
;
// std::upper_bound returns an iterator to the bucket the value belongs in,
// when normalized by the probability distribution dist
auto
sample_iter
=
auto
sample_iter
=
std
::
upper_bound
(
cdf_begin
,
cdf_end
,
dist
[
i
]
*
*
(
std
::
prev
(
cdf_end
)));
std
::
upper_bound
(
cdf_begin
,
cdf_end
,
dist
[
i
]
*
*
(
std
::
prev
(
cdf_end
)));
// convert iterator to an integer index
output
[
i
]
=
std
::
distance
(
cdf_begin
,
sample_iter
);
output
[
i
]
=
std
::
distance
(
cdf_begin
,
sample_iter
);
});
});
});
});
});
});
return
result
;
return
result
;
}
}
};
};
...
...
src/
targets/gpu/gather.c
pp
→
src/
include/migraphx/op/nearbyint.h
pp
View file @
a24ed87e
/*
/*
* The MIT License (MIT)
* The MIT License (MIT)
*
*
* Copyright (c) 2015-202
2
Advanced Micro Devices, Inc. All rights reserved.
* Copyright (c) 2015-202
3
Advanced Micro Devices, Inc. All rights reserved.
*
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* of this software and associated documentation files (the "Software"), to deal
...
@@ -21,25 +21,30 @@
...
@@ -21,25 +21,30 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* THE SOFTWARE.
*/
*/
#include <migraphx/gpu/gather.hpp>
#ifndef MIGRAPHX_GUARD_OPERATORS_NEARBYINT_HPP
#include <migraphx/gpu/context.hpp>
#define MIGRAPHX_GUARD_OPERATORS_NEARBYINT_HPP
#include <migraphx/gpu/device/gather.hpp>
#include <migraphx/op/unary.hpp>
#include <migraphx/config.hpp>
#include <fenv.h>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
op
{
struct
nearbyint
:
unary
<
nearbyint
>
shape
hip_gather
::
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
{
inputs
.
pop_back
();
return
op
.
normalize_compute_shape
(
inputs
);
}
argument
hip_gather
::
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
{
{
return
device
::
gather
(
ctx
.
get_stream
().
get
(),
args
.
back
(),
args
[
0
],
args
[
1
],
op
.
axis
);
auto
apply
()
const
}
{
return
[](
auto
x
)
{
}
// namespace gpu
auto
rounding_mode
=
fegetround
();
fesetround
(
FE_TONEAREST
);
return
std
::
nearbyint
(
x
);
fesetround
(
rounding_mode
);
};
}
};
}
// namespace op
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
}
// namespace migraphx
#endif
src/include/migraphx/op/normalize_attribute.hpp
View file @
a24ed87e
...
@@ -40,6 +40,8 @@ namespace op {
...
@@ -40,6 +40,8 @@ namespace op {
* 2. use_rank (default) vs use_len:
* 2. use_rank (default) vs use_len:
* `use_rank` sets the max value/index of the attribute as the rank of lens.
* `use_rank` sets the max value/index of the attribute as the rank of lens.
* `use_lens` sets the max value/index as the corresponding value in lens at the axes index.
* `use_lens` sets the max value/index as the corresponding value in lens at the axes index.
* Uses the dynamic_dimension.max value for dynamic shapes. Returns the original vector
* (no normalization) if any of dynamic_dimension[axes] are not fixed.
* 3. `clip_min` vs. `not_clip_min` (default):
* 3. `clip_min` vs. `not_clip_min` (default):
* Clip values less than the minimum to the minimum or not.
* Clip values less than the minimum to the minimum or not.
* 4. `include_min` vs. `exclude_min` (default):
* 4. `include_min` vs. `exclude_min` (default):
...
...
src/include/migraphx/op/pooling.hpp
View file @
a24ed87e
...
@@ -70,7 +70,8 @@ struct pooling
...
@@ -70,7 +70,8 @@ struct pooling
// 2 smaller than the input tensor rank (NCHW layout)
// 2 smaller than the input tensor rank (NCHW layout)
std
::
vector
<
std
::
size_t
>
lengths
=
{
1
,
1
};
std
::
vector
<
std
::
size_t
>
lengths
=
{
1
,
1
};
// Dilations are not supported at this time.
// Spacing between the elements of the pooling kernel. Must be the same ndim as lengths.
std
::
vector
<
std
::
size_t
>
dilations
=
{
1
,
1
};
// ceiling mode is a flag affecting output size
// ceiling mode is a flag affecting output size
// or equivalently, placements of the pooling kernel.
// or equivalently, placements of the pooling kernel.
...
@@ -99,6 +100,7 @@ struct pooling
...
@@ -99,6 +100,7 @@ struct pooling
f
(
self
.
padding_mode
,
"padding_mode"
),
f
(
self
.
padding_mode
,
"padding_mode"
),
f
(
self
.
stride
,
"stride"
),
f
(
self
.
stride
,
"stride"
),
f
(
self
.
lengths
,
"lengths"
),
f
(
self
.
lengths
,
"lengths"
),
f
(
self
.
dilations
,
"dilations"
),
f
(
self
.
ceil_mode
,
"ceil_mode"
),
f
(
self
.
ceil_mode
,
"ceil_mode"
),
f
(
self
.
lp_order
,
"lp_order"
),
f
(
self
.
lp_order
,
"lp_order"
),
f
(
self
.
dyn_global
,
"dyn_global"
));
f
(
self
.
dyn_global
,
"dyn_global"
));
...
@@ -112,14 +114,17 @@ struct pooling
...
@@ -112,14 +114,17 @@ struct pooling
return
;
return
;
if
((
padding_mode
!=
default_
and
padding
.
size
()
!=
stride
.
size
()
and
if
((
padding_mode
!=
default_
and
padding
.
size
()
!=
stride
.
size
()
and
(
padding
.
size
())
!=
stride
.
size
()
*
2
)
or
(
padding
.
size
())
!=
stride
.
size
()
*
2
)
or
stride
.
size
()
!=
lengths
.
size
())
stride
.
size
()
!=
lengths
.
size
()
or
dilations
.
size
()
!=
lengths
.
size
())
{
{
MIGRAPHX_THROW
(
"POOLING: inconsistent attribute sizes"
);
MIGRAPHX_THROW
(
"POOLING: inconsistent attribute sizes"
);
}
}
if
(
std
::
any_of
(
lengths
.
begin
(),
lengths
.
end
(),
[
&
](
auto
i
)
{
return
(
i
==
0
);
})
or
std
::
any_of
(
stride
.
begin
(),
stride
.
end
(),
[
&
](
auto
i
)
{
return
(
i
==
0
);
}))
const
auto
is_zero
=
[](
auto
el
)
{
return
el
==
0
;
};
if
(
std
::
any_of
(
lengths
.
begin
(),
lengths
.
end
(),
is_zero
)
or
std
::
any_of
(
stride
.
begin
(),
stride
.
end
(),
is_zero
)
or
std
::
any_of
(
dilations
.
begin
(),
dilations
.
end
(),
is_zero
))
{
{
MIGRAPHX_THROW
(
"POOLING: size 0 pooling kernel or stride"
);
MIGRAPHX_THROW
(
"POOLING: size 0 pooling kernel or stride
or dilations
"
);
}
}
// TODO: update lowering to run the reference
// TODO: update lowering to run the reference
...
@@ -142,6 +147,11 @@ struct pooling
...
@@ -142,6 +147,11 @@ struct pooling
value
attributes
()
const
{
return
{{
"normalize_padding"
,
"padding"
}};
}
value
attributes
()
const
{
return
{{
"normalize_padding"
,
"padding"
}};
}
inline
std
::
size_t
dilate_dim
(
std
::
size_t
dim
,
std
::
size_t
dilation
)
const
{
return
1
+
dilation
*
(
dim
-
1
);
}
std
::
vector
<
std
::
size_t
>
calc_spatial_dim_out
(
const
std
::
vector
<
std
::
size_t
>&
input_lens
,
std
::
vector
<
std
::
size_t
>
calc_spatial_dim_out
(
const
std
::
vector
<
std
::
size_t
>&
input_lens
,
std
::
size_t
kdims
)
const
std
::
size_t
kdims
)
const
{
{
...
@@ -151,8 +161,9 @@ struct pooling
...
@@ -151,8 +161,9 @@ struct pooling
std
::
size_t
padding_factor
=
2
*
padding
[
i
];
std
::
size_t
padding_factor
=
2
*
padding
[
i
];
if
(
padding
.
size
()
==
2
*
kdims
)
if
(
padding
.
size
()
==
2
*
kdims
)
padding_factor
=
padding
[
i
]
+
padding
[
i
+
kdims
];
padding_factor
=
padding
[
i
]
+
padding
[
i
+
kdims
];
std
::
size_t
dilated_length
=
dilate_dim
(
lengths
[
i
],
dilations
[
i
]);
std
::
size_t
dim_size
;
std
::
size_t
dim_size
;
if
(
input_lens
[
i
+
2
]
+
padding_factor
<
length
s
[
i
]
)
if
(
input_lens
[
i
+
2
]
+
padding_factor
<
dilated_
length
)
{
{
if
(
padding_mode
==
default_
)
if
(
padding_mode
==
default_
)
MIGRAPHX_THROW
(
"POOLING: not enough padding for the given kernel size"
);
MIGRAPHX_THROW
(
"POOLING: not enough padding for the given kernel size"
);
...
@@ -162,7 +173,7 @@ struct pooling
...
@@ -162,7 +173,7 @@ struct pooling
}
}
else
else
{
{
dim_size
=
input_lens
[
i
+
2
]
+
padding_factor
-
length
s
[
i
]
;
dim_size
=
input_lens
[
i
+
2
]
+
padding_factor
-
dilated_
length
;
}
}
std
::
size_t
len
=
std
::
size_t
len
=
(
ceil_mode
)
(
ceil_mode
)
...
@@ -331,6 +342,7 @@ struct pooling
...
@@ -331,6 +342,7 @@ struct pooling
int
start
=
static_cast
<
int
>
(
idx_o
[
dim
]
*
stride
[
d_2
])
-
int
start
=
static_cast
<
int
>
(
idx_o
[
dim
]
*
stride
[
d_2
])
-
static_cast
<
int
>
(
padding_vals
[
d_2
]);
static_cast
<
int
>
(
padding_vals
[
d_2
]);
int
end
;
int
end
;
std
::
size_t
dilated_kernel_dim
=
dilate_dim
(
kernel_dims
[
d_2
],
dilations
[
d_2
]);
// NOLINT
// NOLINT
if
(
count_include_pad
and
ceil_mode
and
(
mode
!=
pooling_mode
::
max
))
if
(
count_include_pad
and
ceil_mode
and
(
mode
!=
pooling_mode
::
max
))
{
{
...
@@ -340,15 +352,14 @@ struct pooling
...
@@ -340,15 +352,14 @@ struct pooling
// padding. Clip out-of-bounds indexes but not padding.
// padding. Clip out-of-bounds indexes but not padding.
// Check if this kernel extends beyond the padding at end of dimension
// Check if this kernel extends beyond the padding at end of dimension
end
=
std
::
min
(
start
+
kernel_dim
s
[
d_2
]
,
end
=
std
::
min
(
start
+
dilated_
kernel_dim
,
in_lens
[
dim
]
+
static_cast
<
int
>
(
padding_vals
[
d_2
]));
in_lens
[
dim
]
+
static_cast
<
int
>
(
padding_vals
[
d_2
]));
}
}
else
else
{
{
// In non-ceiling mode, when
// In non-ceiling mode, when
// count_include_pad is false, or for max pooling, clip off padding.
// count_include_pad is false, or for max pooling, clip off padding.
end
=
std
::
min
(
start
+
kernel_dims
[
d_2
],
in_lens
[
dim
]);
end
=
std
::
min
(
start
+
dilated_kernel_dim
,
in_lens
[
dim
]);
start
=
std
::
max
(
start
,
0
);
}
}
win_start
.
push_back
(
start
);
win_start
.
push_back
(
start
);
if
(
end
<
start
)
if
(
end
<
start
)
...
@@ -366,6 +377,16 @@ struct pooling
...
@@ -366,6 +377,16 @@ struct pooling
// for each element in the window...
// for each element in the window...
shape_for_each
(
win_shape
,
[
&
](
const
auto
&
idx_w
)
{
shape_for_each
(
win_shape
,
[
&
](
const
auto
&
idx_w
)
{
// Skip elements that belong to the dilated area
for
(
size_t
axis
=
0
;
axis
<
idx_w
.
size
();
++
axis
)
{
if
(
idx_w
[
axis
]
%
dilations
[
axis
])
{
pool_size
-=
1
;
return
;
}
}
// the coordinates of this element
// the coordinates of this element
auto
idx
=
idx_o
;
auto
idx
=
idx_o
;
...
@@ -390,7 +411,15 @@ struct pooling
...
@@ -390,7 +411,15 @@ struct pooling
// this is a padding element. Padding locations
// this is a padding element. Padding locations
// don't contribute to average or max pooling total but can play in
// don't contribute to average or max pooling total but can play in
// lpnorm pooling.
// lpnorm pooling.
output_val
=
op
(
output_val
,
0
);
if
(
mode
==
pooling_mode
::
lpnorm
)
{
output_val
=
op
(
output_val
,
op
.
template
init
<
Type
>());
}
if
(
mode
==
pooling_mode
::
average
)
{
// Ignore padding
pool_size
-=
1
;
}
}
}
});
});
output
[
i
]
=
Type
(
op
.
final
(
output_val
,
pool_size
));
output
[
i
]
=
Type
(
op
.
final
(
output_val
,
pool_size
));
...
...
src/include/migraphx/op/prefix_scan_op.hpp
View file @
a24ed87e
...
@@ -22,6 +22,12 @@
...
@@ -22,6 +22,12 @@
* THE SOFTWARE.
* THE SOFTWARE.
*/
*/
/**
* Parent struct for prefix scan ops. A prefix scan is a mathematical entity useful
* in parallelizing various computations. Given a list of numbers, a prefix scan
* op returns an equal size list of running totals of the values. Other operations
* besides addition can be supported by child ops.
*/
#ifndef MIGRAPHX_GUARD_OPERATORS_SCAN_OP_HPP
#ifndef MIGRAPHX_GUARD_OPERATORS_SCAN_OP_HPP
#define MIGRAPHX_GUARD_OPERATORS_SCAN_OP_HPP
#define MIGRAPHX_GUARD_OPERATORS_SCAN_OP_HPP
...
...
src/include/migraphx/op/quantizelinear.hpp
View file @
a24ed87e
...
@@ -30,11 +30,11 @@
...
@@ -30,11 +30,11 @@
#include <migraphx/par_for.hpp>
#include <migraphx/par_for.hpp>
#include <migraphx/value.hpp>
#include <migraphx/value.hpp>
#include <cmath>
#include <cmath>
#include <fenv.h>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
op
{
namespace
op
{
struct
quantizelinear
struct
quantizelinear
{
{
std
::
string
name
()
const
{
return
"quantizelinear"
;
}
std
::
string
name
()
const
{
return
"quantizelinear"
;
}
...
@@ -71,26 +71,26 @@ struct quantizelinear
...
@@ -71,26 +71,26 @@ struct quantizelinear
{
{
y_zero_point
=
args
.
at
(
2
);
y_zero_point
=
args
.
at
(
2
);
}
}
argument
result
{
output_shape
};
argument
result
{
output_shape
};
auto
rounding_mode
=
fegetround
();
fesetround
(
FE_TONEAREST
);
visit_all
(
result
,
y_zero_point
)([
&
](
auto
output
,
auto
zero_pts
)
{
visit_all
(
result
,
y_zero_point
)([
&
](
auto
output
,
auto
zero_pts
)
{
visit_all
(
x
,
y_scale
)([
&
](
auto
input
,
auto
scales
)
{
visit_all
(
x
,
y_scale
)([
&
](
auto
input
,
auto
scales
)
{
using
quant_type
=
typename
decltype
(
output
)
::
value_type
;
using
quant_type
=
typename
decltype
(
output
)
::
value_type
;
auto
min_value
=
std
::
numeric_limits
<
quant_type
>::
min
();
auto
min_value
=
std
::
numeric_limits
<
quant_type
>::
min
();
auto
max_value
=
std
::
numeric_limits
<
quant_type
>::
max
();
auto
max_value
=
std
::
numeric_limits
<
quant_type
>::
max
();
par_for
(
output_shape
.
elements
(),
[
&
](
auto
i
)
{
par_for
(
output_shape
.
elements
(),
[
&
](
auto
i
)
{
int64_t
quantized
=
static_cast
<
int64_t
>
(
std
::
round
(
input
[
i
]
/
scales
[
i
]))
+
int64_t
quantized
=
static_cast
<
int64_t
>
(
std
::
nearbyint
(
input
[
i
]
/
scales
[
i
]))
+
static_cast
<
int64_t
>
(
zero_pts
[
i
]);
static_cast
<
int64_t
>
(
zero_pts
[
i
]);
output
[
i
]
=
std
::
max
(
static_cast
<
int64_t
>
(
min_value
),
output
[
i
]
=
std
::
max
(
static_cast
<
int64_t
>
(
min_value
),
std
::
min
(
static_cast
<
int64_t
>
(
max_value
),
quantized
));
std
::
min
(
static_cast
<
int64_t
>
(
max_value
),
quantized
));
});
});
});
});
});
});
fesetround
(
rounding_mode
);
return
result
;
return
result
;
}
}
};
};
}
// namespace op
}
// namespace op
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
}
// namespace migraphx
...
...
src/include/migraphx/op/random_uniform.hpp
View file @
a24ed87e
...
@@ -65,11 +65,10 @@ struct random_uniform
...
@@ -65,11 +65,10 @@ struct random_uniform
return
inputs
.
at
(
1
);
return
inputs
.
at
(
1
);
}
}
argument
compute
(
const
shape
&
,
std
::
vector
<
argument
>
args
)
const
argument
compute
(
const
dyn_output
&
dyn_out
,
std
::
vector
<
argument
>
args
)
const
{
{
// Output goes into the passed buffer, not the shape output.
// Output goes into the passed buffer, not the shape output.
auto
result
=
args
[
1
];
argument
result
{
dyn_out
.
computed_shape
};
uint64_t
local_seed
=
args
[
0
].
at
<
uint64_t
>
(
0
);
uint64_t
local_seed
=
args
[
0
].
at
<
uint64_t
>
(
0
);
std
::
mt19937
gen
(
local_seed
);
std
::
mt19937
gen
(
local_seed
);
...
...
src/
targets/gpu/
include/migraphx/
gpu/pack_int8_args
.hpp
→
src/include/migraphx/
op/scatternd_max
.hpp
View file @
a24ed87e
/*
/*
* The MIT License (MIT)
* The MIT License (MIT)
*
*
* Copyright (c) 2015-202
2
Advanced Micro Devices, Inc. All rights reserved.
* Copyright (c) 2015-202
3
Advanced Micro Devices, Inc. All rights reserved.
*
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* of this software and associated documentation files (the "Software"), to deal
...
@@ -21,25 +21,26 @@
...
@@ -21,25 +21,26 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* THE SOFTWARE.
*/
*/
#ifndef MIGRAPHX_GUARD_
RTGLIB_PACK_INT8_ARGS
_HPP
#ifndef MIGRAPHX_GUARD_
OPERATORS_SCATTERND_MAX
_HPP
#define MIGRAPHX_GUARD_
RTGLIB_PACK_INT8_ARGS
_HPP
#define MIGRAPHX_GUARD_
OPERATORS_SCATTERND_MAX
_HPP
#include <migraphx/program.hpp>
#include <migraphx/op/scatternd_op.hpp>
#include <migraphx/gpu/context.hpp>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
op
{
namespace
gpu
{
struct
scatternd_max
:
scatternd_op
<
scatternd_max
>
struct
MIGRAPHX_GPU_EXPORT
pack_int8_args
{
{
std
::
string
name
()
const
{
return
"gpu::pack_int8_args"
;
}
scatternd_max
()
{}
void
apply
(
module
&
m
)
const
;
shape
pack_int8_shape
(
const
shape
&
s
)
const
;
auto
reduction
()
const
{
return
[](
auto
&
x
,
const
auto
&
y
)
{
x
=
std
::
max
(
x
,
y
);
};
}
};
};
}
// namespace
gpu
}
// namespace
op
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
}
// namespace migraphx
...
...
src/
targets/gpu/
include/migraphx/
gpu/device/g
at
h
er.hpp
→
src/include/migraphx/
op/sc
at
t
er
nd_min
.hpp
View file @
a24ed87e
/*
/*
* The MIT License (MIT)
* The MIT License (MIT)
*
*
* Copyright (c) 2015-202
2
Advanced Micro Devices, Inc. All rights reserved.
* Copyright (c) 2015-202
3
Advanced Micro Devices, Inc. All rights reserved.
*
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* of this software and associated documentation files (the "Software"), to deal
...
@@ -21,23 +21,26 @@
...
@@ -21,23 +21,26 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* THE SOFTWARE.
*/
*/
#ifndef MIGRAPHX_GUARD_
RTGLIB_DEVICE_G
AT
H
ER_HPP
#ifndef MIGRAPHX_GUARD_
OPERATORS_SC
AT
T
ER
ND_MIN
_HPP
#define MIGRAPHX_GUARD_
RTGLIB_DEVICE_G
AT
H
ER_HPP
#define MIGRAPHX_GUARD_
OPERATORS_SC
AT
T
ER
ND_MIN
_HPP
#include <migraphx/argument.hpp>
#include <migraphx/op/scatternd_op.hpp>
#include <migraphx/gpu/device/config.hpp>
#include <hip/hip_runtime_api.h>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
op
{
namespace
device
{
argument
MIGRAPHX_DEVICE_EXPORT
struct
scatternd_min
:
scatternd_op
<
scatternd_min
>
gather
(
hipStream_t
stream
,
argument
result
,
argument
arg1
,
argument
arg2
,
int64_t
axis
);
{
scatternd_min
()
{}
}
// namespace device
auto
reduction
()
const
}
// namespace gpu
{
return
[](
auto
&
x
,
const
auto
&
y
)
{
x
=
std
::
min
(
x
,
y
);
};
}
};
}
// namespace op
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
}
// namespace migraphx
...
...
src/include/migraphx/op/scatternd_op.hpp
View file @
a24ed87e
...
@@ -121,7 +121,8 @@ struct scatternd_op : op_name<Derived>
...
@@ -121,7 +121,8 @@ struct scatternd_op : op_name<Derived>
auto
k
=
indices_shape
.
lens
().
back
();
auto
k
=
indices_shape
.
lens
().
back
();
auto
q
=
indices_shape
.
ndim
();
auto
q
=
indices_shape
.
ndim
();
auto
r
=
dyn_out
.
computed_shape
.
ndim
();
auto
r
=
dyn_out
.
computed_shape
.
ndim
();
par_for
(
updates_shape
.
elements
(),
[
&
](
const
auto
i
)
{
for
(
auto
i
=
0u
;
i
<
updates_shape
.
elements
();
++
i
)
{
auto
updates_idx
=
updates_std
.
multi
(
i
);
auto
updates_idx
=
updates_std
.
multi
(
i
);
std
::
vector
<
std
::
size_t
>
indices_idx
(
q
,
0
);
std
::
vector
<
std
::
size_t
>
indices_idx
(
q
,
0
);
std
::
copy
(
std
::
copy
(
...
@@ -135,7 +136,7 @@ struct scatternd_op : op_name<Derived>
...
@@ -135,7 +136,7 @@ struct scatternd_op : op_name<Derived>
std
::
copy
(
updates_idx
.
begin
()
+
q
-
1
,
updates_idx
.
end
(),
out_idx
.
begin
()
+
k
);
std
::
copy
(
updates_idx
.
begin
()
+
q
-
1
,
updates_idx
.
end
(),
out_idx
.
begin
()
+
k
);
self
.
reduction
()(
output
[
dyn_out
.
computed_shape
.
index
(
out_idx
)],
updates
[
i
]);
self
.
reduction
()(
output
[
dyn_out
.
computed_shape
.
index
(
out_idx
)],
updates
[
i
]);
}
);
}
});
});
});
});
...
...
src/include/migraphx/op/slice.hpp
View file @
a24ed87e
...
@@ -31,6 +31,7 @@
...
@@ -31,6 +31,7 @@
#include <migraphx/dyn_output.hpp>
#include <migraphx/dyn_output.hpp>
#include <migraphx/op/normalize_attribute.hpp>
#include <migraphx/op/normalize_attribute.hpp>
#include <migraphx/normalize_attributes.hpp>
#include <migraphx/normalize_attributes.hpp>
#include <array>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
@@ -38,6 +39,18 @@ namespace op {
...
@@ -38,6 +39,18 @@ namespace op {
/**
/**
* Slice operator that accepts variable axes, starts and ends.
* Slice operator that accepts variable axes, starts and ends.
* All of `starts`, `ends`, and `axes` must be supplied by either
* their attribute or an input (but not both).
*
* Valid calls:
* slice(input); axes, starts, ends set
* slice(input, starts); axes, ends set
* slice(input, ends); starts, axes set
* slice(input, axes); starts, ends set
* slice(input, starts, ends); axes set
* slice(input, starts, axes); ends set
* slice(input, ends, axes); starts set
* slice(input, start, ends, axes); none set
*
*
* Attributes:
* Attributes:
* axes: constant axes to slice over (optional)
* axes: constant axes to slice over (optional)
...
@@ -46,8 +59,8 @@ namespace op {
...
@@ -46,8 +59,8 @@ namespace op {
*
*
* Parameters:
* Parameters:
* data: the input tensor to slice (dynamic or static shape)
* data: the input tensor to slice (dynamic or static shape)
* input_starts: starting indic
i
es of slice (optional, static shape)
* input_starts: starting indices of slice (optional, static shape)
* input_ends: ending indic
i
es of slice (optional, static shape)
* input_ends: ending indices of slice (optional, static shape)
* input_axes: axes to slice over (optional, static shape)
* input_axes: axes to slice over (optional, static shape)
*/
*/
struct
slice
struct
slice
...
@@ -56,6 +69,18 @@ struct slice
...
@@ -56,6 +69,18 @@ struct slice
std
::
vector
<
int64_t
>
starts
{};
std
::
vector
<
int64_t
>
starts
{};
std
::
vector
<
int64_t
>
ends
{};
std
::
vector
<
int64_t
>
ends
{};
/**
* Named arrays for the set attribute possibilities.
*/
static
constexpr
std
::
array
<
bool
,
3
>
all_set
=
{
true
,
true
,
true
};
static
constexpr
std
::
array
<
bool
,
3
>
ends_axes
=
{
false
,
true
,
true
};
static
constexpr
std
::
array
<
bool
,
3
>
starts_axes
=
{
true
,
false
,
true
};
static
constexpr
std
::
array
<
bool
,
3
>
starts_ends
=
{
true
,
true
,
false
};
static
constexpr
std
::
array
<
bool
,
3
>
axes_only
=
{
false
,
false
,
true
};
static
constexpr
std
::
array
<
bool
,
3
>
ends_only
=
{
false
,
true
,
false
};
static
constexpr
std
::
array
<
bool
,
3
>
starts_only
=
{
true
,
false
,
false
};
static
constexpr
std
::
array
<
bool
,
3
>
none_set
=
{
false
,
false
,
false
};
template
<
class
Self
,
class
F
>
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
static
auto
reflect
(
Self
&
self
,
F
f
)
{
{
...
@@ -63,24 +88,26 @@ struct slice
...
@@ -63,24 +88,26 @@ struct slice
}
}
/**
/**
* Ensure that attribute vectors axes, starts, and ends are all the same size and values are
* Ensure that attribute axes is within limits.
* within limits.
* Will attempt to normalize starts and ends; but will use the dynamic_dimension.max
* values for dynamic shapes. This makes it so you have to renormalize for
* non-fixed dynamic_dimensions.
*/
*/
value
attributes
()
const
value
attributes
()
const
{
{
value
normalize
=
value
::
object
{};
value
normalize
_axes
=
value
::
object
{};
normalize
[
"axes"
]
=
value
::
array
{
normalize_attribute
::
include_min
};
normalize
_axes
[
"axes"
]
=
value
::
array
{
normalize_attribute
::
include_min
};
normalize
[
"starts"
]
=
value
::
array
{
normalize_attribute
::
clip_max
,
normalize
_axes
[
"starts"
]
=
value
::
array
{
normalize_attribute
::
clip_max
,
normalize_attribute
::
clip_min
,
normalize_attribute
::
clip_min
,
normalize_attribute
::
include_max
,
normalize_attribute
::
include_max
,
normalize_attribute
::
use_len
,
normalize_attribute
::
use_len
,
normalize_attribute
::
include_min
};
normalize_attribute
::
include_min
};
normalize
[
"ends"
]
=
value
::
array
{
normalize_attribute
::
clip_max
,
normalize
_axes
[
"ends"
]
=
value
::
array
{
normalize_attribute
::
clip_max
,
normalize_attribute
::
clip_min
,
normalize_attribute
::
clip_min
,
normalize_attribute
::
include_max
,
normalize_attribute
::
include_max
,
normalize_attribute
::
use_len
,
normalize_attribute
::
use_len
,
normalize_attribute
::
include_min
};
normalize_attribute
::
include_min
};
return
{{
"normalize_axes"
,
normalize
}};
return
{{
"normalize_axes"
,
normalize
_axes
}};
}
}
std
::
string
name
()
const
{
return
"slice"
;
}
std
::
string
name
()
const
{
return
"slice"
;
}
...
@@ -88,7 +115,7 @@ struct slice
...
@@ -88,7 +115,7 @@ struct slice
/**
/**
* Computes the slice output shape dimensions for given starts, ends,and axes.
* Computes the slice output shape dimensions for given starts, ends,and axes.
* Templated to also handle tensor views.
* Templated to also handle tensor views.
* Possib
i
ly different type between [in_starts, in_ends] and [in_axes] if in_axes is this
* Possibly different type between [in_starts, in_ends] and [in_axes] if in_axes is this
* object's axes attribute. Assumes in_starts and in_ends are normalized; in_axes are valid.
* object's axes attribute. Assumes in_starts and in_ends are normalized; in_axes are valid.
*/
*/
template
<
class
A
,
class
B
>
template
<
class
A
,
class
B
>
...
@@ -104,62 +131,160 @@ struct slice
...
@@ -104,62 +131,160 @@ struct slice
return
new_lens
;
return
new_lens
;
}
}
shape
normalize_compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
/// Get the attributes that are non-empty
std
::
array
<
bool
,
3
>
get_set_attributes
()
const
{
{
check_shapes
{
inputs
,
*
this
,
true
}.
has
(
1
,
3
,
4
);
std
::
array
<
std
::
vector
<
int64_t
>
,
3
>
attrs
=
{
this
->
starts
,
this
->
ends
,
this
->
axes
};
auto
input_shape
=
inputs
[
0
];
std
::
array
<
bool
,
3
>
bool_vec
;
if
(
inputs
.
size
()
==
1
)
std
::
transform
(
attrs
.
cbegin
(),
attrs
.
cend
(),
bool_vec
.
begin
(),
[](
auto
a
)
{
return
not
a
.
empty
();
});
return
bool_vec
;
}
/// Helper function for normalize_compute_shape()
shape
compute_two_or_more
(
std
::
vector
<
shape
>
inputs
)
const
{
auto
input_shape
=
inputs
[
0
];
auto
set_attributes
=
get_set_attributes
();
// check that inputs [1, end) are all 1D, have the same
// dimension, and are static
check_shapes
{
inputs
.
begin
()
+
1
,
inputs
.
end
(),
std
::
string
(
"SLICE: inputs (starts, ends, and input_axes)"
),
false
}
.
only_dims
(
1
)
.
same_dims
();
auto
dds
=
input_shape
.
to_dynamic
().
dyn_dims
();
if
(
inputs
.
size
()
==
2
)
{
{
auto
t
=
input_shape
.
type
();
if
(
set_attributes
==
ends_axes
)
if
(
input_shape
.
dynamic
()
and
std
::
any_of
(
axes
.
begin
(),
axes
.
end
(),
[
&
](
auto
axis
)
{
return
not
input_shape
.
dyn_dims
()[
axis
].
is_fixed
();
}))
{
{
MIGRAPHX_THROW
(
"SLICE: slicing is not allowed on non-fixed dynamic input axis "
);
// attr ends and axes set; inputs are (data, input_starts)
if
(
inputs
[
1
].
lens
().
at
(
0
)
!=
axes
.
size
())
{
MIGRAPHX_THROW
(
"SLICE: 2 input and attributes mismatch"
);
}
std
::
for_each
(
axes
.
cbegin
(),
axes
.
cend
(),
[
&
](
const
auto
&
axis
)
{
dds
.
at
(
axis
)
=
{
0
,
dds
.
at
(
axis
).
max
};
});
}
}
if
(
input_shape
.
dynamic
()
)
else
if
(
set_attributes
==
starts_axes
)
{
{
return
shape
{
t
,
// attr starts and axes set; inputs are (data, input_ends)
lens_calc
(
input_shape
.
min_lens
(),
starts
,
ends
,
axes
),
if
(
inputs
[
1
].
lens
().
at
(
0
)
!=
axes
.
size
())
lens_calc
(
input_shape
.
max_lens
(),
starts
,
ends
,
axes
),
{
{}};
MIGRAPHX_THROW
(
"SLICE: 2 input and attributes mismatch"
);
}
std
::
for_each
(
axes
.
cbegin
(),
axes
.
cend
(),
[
&
](
const
auto
&
axis
)
{
dds
.
at
(
axis
)
=
{
0
,
dds
.
at
(
axis
).
max
};
});
}
else
if
(
set_attributes
==
starts_ends
)
{
// attr starts and ends set; inputs are (data, input_axes)
if
(
inputs
[
1
].
lens
().
at
(
0
)
!=
starts
.
size
())
{
MIGRAPHX_THROW
(
"SLICE: 2 input and attributes mismatch"
);
}
std
::
transform
(
dds
.
begin
(),
dds
.
end
(),
dds
.
begin
(),
[](
auto
dd
)
{
return
shape
::
dynamic_dimension
{
0
,
dd
.
max
};
});
}
}
else
else
{
{
return
shape
{
MIGRAPHX_THROW
(
"SLICE: Invalid 2 input and attributes configuration"
);
t
,
lens_calc
(
input_shape
.
lens
(),
starts
,
ends
,
axes
),
input_shape
.
strides
()};
}
}
}
}
else
else
if
(
inputs
.
size
()
==
3
)
{
{
// check that starts, ends, and optionally input_axes are all 1D, have the same
if
(
set_attributes
==
axes_only
)
// dimension, and are static
check_shapes
{
inputs
.
begin
()
+
1
,
inputs
.
end
(),
std
::
string
(
"SLICE: inputs (starts, ends, and input_axes)"
),
false
}
.
only_dims
(
1
)
.
same_dims
();
auto
dds
=
input_shape
.
to_dynamic
().
dyn_dims
();
if
(
inputs
.
size
()
==
3
)
{
{
// attr axes set; inputs are (data, input_starts, input_ends)
if
(
inputs
[
1
].
lens
().
at
(
0
)
!=
axes
.
size
())
if
(
inputs
[
1
].
lens
().
at
(
0
)
!=
axes
.
size
())
{
{
MIGRAPHX_THROW
(
"SLICE: inputs starts and ends do not have the same dimension "
MIGRAPHX_THROW
(
"SLICE: 3 input and attributes mismatch"
);
"as the axes attribute"
);
}
}
std
::
for_each
(
axes
.
cbegin
(),
axes
.
cend
(),
[
&
](
const
auto
&
axis
)
{
std
::
for_each
(
axes
.
cbegin
(),
axes
.
cend
(),
[
&
](
const
auto
&
axis
)
{
dds
.
at
(
axis
)
=
{
0
,
dds
.
at
(
axis
).
max
};
dds
.
at
(
axis
)
=
{
0
,
dds
.
at
(
axis
).
max
};
});
});
}
}
else
else
if
(
set_attributes
==
ends_only
)
{
// attr ends set; inputs are (data, input_starts, input_axes)
if
(
inputs
[
1
].
lens
().
at
(
0
)
!=
ends
.
size
())
{
MIGRAPHX_THROW
(
"SLICE: 3 input and attributes mismatch"
);
}
std
::
transform
(
dds
.
begin
(),
dds
.
end
(),
dds
.
begin
(),
[](
auto
dd
)
{
return
shape
::
dynamic_dimension
{
0
,
dd
.
max
};
});
}
else
if
(
set_attributes
==
starts_only
)
{
{
// if axes is an input, then all the output dimensions could be 0 to the max value
// attr starts set; inputs are (data, input_ends, input_axes)
if
(
inputs
[
1
].
lens
().
at
(
0
)
!=
starts
.
size
())
{
MIGRAPHX_THROW
(
"SLICE: 3 input and attributes mismatch"
);
}
std
::
transform
(
dds
.
begin
(),
dds
.
end
(),
dds
.
begin
(),
[](
auto
dd
)
{
std
::
transform
(
dds
.
begin
(),
dds
.
end
(),
dds
.
begin
(),
[](
auto
dd
)
{
return
shape
::
dynamic_dimension
{
0
,
dd
.
max
};
return
shape
::
dynamic_dimension
{
0
,
dd
.
max
};
});
});
}
}
return
shape
{
input_shape
.
type
(),
dds
};
else
{
MIGRAPHX_THROW
(
"Invalid 3 input and attributes configuration"
);
}
}
else
{
// all 4 inputs (data, inputs_starts, input_ends, input_axes)
std
::
transform
(
dds
.
begin
(),
dds
.
end
(),
dds
.
begin
(),
[](
auto
dd
)
{
return
shape
::
dynamic_dimension
{
0
,
dd
.
max
};
});
}
return
shape
{
input_shape
.
type
(),
dds
};
}
// uses the normalize_axes flag to normalize axes, starts, and ends
shape
normalize_compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
{
check_shapes
{
inputs
,
*
this
,
true
}.
has
(
1
,
2
,
3
,
4
);
if
(
inputs
.
size
()
==
1
)
{
auto
input_shape
=
inputs
[
0
];
auto
set_attributes
=
get_set_attributes
();
if
(
set_attributes
!=
all_set
)
{
MIGRAPHX_THROW
(
"SLICE 1_arg: Invalid 1 input and attributes configuration"
);
}
// NOTE: make sure to update how normalization works here if this type of slicing is
// changed to be allowed
if
(
input_shape
.
dynamic
()
and
std
::
any_of
(
axes
.
begin
(),
axes
.
end
(),
[
&
](
auto
axis
)
{
return
not
input_shape
.
dyn_dims
()[
axis
].
is_fixed
();
}))
{
MIGRAPHX_THROW
(
"SLICE 1_arg: slicing is not allowed on non-fixed dynamic input axis "
);
}
if
(
input_shape
.
dynamic
())
{
return
shape
{
input_shape
.
type
(),
lens_calc
(
input_shape
.
min_lens
(),
this
->
starts
,
this
->
ends
,
this
->
axes
),
lens_calc
(
input_shape
.
max_lens
(),
this
->
starts
,
this
->
ends
,
this
->
axes
),
{}};
}
else
{
return
shape
{
input_shape
.
type
(),
lens_calc
(
input_shape
.
lens
(),
this
->
starts
,
this
->
ends
,
this
->
axes
),
input_shape
.
strides
()};
}
}
else
{
return
compute_two_or_more
(
inputs
);
}
}
}
}
...
@@ -194,14 +319,14 @@ struct slice
...
@@ -194,14 +319,14 @@ struct slice
/**
/**
* Calculates the starting offset for the sliced tensor (for aliasing).
* Calculates the starting offset for the sliced tensor (for aliasing).
* Used
when the starts and/or the axes are inputs
.
* Used
for 2-4 inputs to `slice
.
*
*
* \param s static input shape
* \param s static input shape
* \param input_starts starting indices of slice
* \param input_starts starting indices of slice
* \param ax_vec axes to slice on
* \param ax_vec axes to slice on
*/
*/
template
<
class
IndView
,
class
Axes
>
template
<
class
T
>
auto
compute_offset
(
const
shape
&
s
,
const
IndView
&
input_starts
,
const
Axes
&
ax_vec
)
const
auto
compute_offset
(
const
shape
&
s
,
const
T
&
input_starts
,
const
T
&
ax_vec
)
const
{
{
auto
ret
=
0
;
auto
ret
=
0
;
for
(
std
::
size_t
i
=
0
;
i
<
ax_vec
.
size
();
++
i
)
for
(
std
::
size_t
i
=
0
;
i
<
ax_vec
.
size
();
++
i
)
...
@@ -212,106 +337,168 @@ struct slice
...
@@ -212,106 +337,168 @@ struct slice
return
ret
*
s
.
type_size
();
return
ret
*
s
.
type_size
();
}
}
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
int64_t
>>
normalize_inputs
(
const
shape
&
input_shape
,
const
std
::
vector
<
int64_t
>&
input_starts
,
const
std
::
vector
<
int64_t
>&
input_ends
)
const
{
auto
attrs
=
this
->
attributes
().
at
(
"normalize_axes"
);
return
{{
"input_starts"
,
normalize_indices
(
input_starts
,
this
->
axes
,
input_shape
,
attrs
.
at
(
"starts"
),
"Slice variable input_starts"
)},
{
"input_ends"
,
normalize_indices
(
input_ends
,
this
->
axes
,
input_shape
,
attrs
.
at
(
"ends"
),
"Slice variable input_ends"
)}};
}
/**
/**
* Three input version of the normalize_inputs.
* If given, normalize the inputs. Otherwise get from operator attributes.
* This one also checks that the input_axes are valid.
* Return the values in a map.
*
* Parameters
* input_shape: static shape of the input
* input_starts: optional
* input_ends: optional
* input_ends: optional
*/
*/
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
int64_t
>>
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
int64_t
>>
normalize_
input
s
(
shape
input_shape
,
normalize_
starts_ends_axe
s
(
shape
input_shape
,
const
std
::
vector
<
int64_t
>&
input_starts
,
const
optional
<
std
::
vector
<
int64_t
>
>
&
input_starts
,
const
std
::
vector
<
int64_t
>&
input_ends
,
const
optional
<
std
::
vector
<
int64_t
>
>
&
input_ends
,
const
std
::
vector
<
int64_t
>&
input_axes
)
const
const
optional
<
std
::
vector
<
int64_t
>
>
&
input_axes
)
const
{
{
auto
attrs
=
this
->
attributes
().
at
(
"normalize_axes"
);
auto
axes_attrs
=
this
->
attributes
().
at
(
"normalize_axes"
);
auto
norm_axes
=
std
::
vector
<
int64_t
>
norm_starts
;
normalize_axes
(
input_axes
,
input_shape
,
attrs
.
at
(
"axes"
),
"Slice variable input_axes"
);
std
::
vector
<
int64_t
>
norm_ends
;
return
{{
"input_starts"
,
std
::
vector
<
int64_t
>
norm_axes
;
normalize_indices
(
input_starts
,
if
(
input_axes
)
norm_axes
,
{
input_shape
,
norm_axes
=
normalize_axes
(
input_axes
.
value
(),
attrs
.
at
(
"starts"
),
input_shape
,
"Slice variable input_starts"
)},
axes_attrs
.
at
(
"axes"
),
{
"input_ends"
,
"Slice variable input_axes"
);
normalize_indices
(
input_ends
,
}
norm_axes
,
else
input_shape
,
{
attrs
.
at
(
"ends"
),
norm_axes
=
this
->
axes
;
"Slice variable input ends"
)},
}
{
"input_axes"
,
norm_axes
}};
if
(
input_starts
)
{
norm_starts
=
normalize_indices
(
input_starts
.
value
(),
norm_axes
,
input_shape
,
axes_attrs
.
at
(
"starts"
),
"Slice variable input_starts"
);
}
else
{
norm_starts
=
this
->
starts
;
}
if
(
input_ends
)
{
norm_ends
=
normalize_indices
(
input_ends
.
value
(),
norm_axes
,
input_shape
,
axes_attrs
.
at
(
"ends"
),
"Slice variable input ends"
);
}
else
{
norm_ends
=
this
->
ends
;
}
return
{{
"norm_starts"
,
norm_starts
},
{
"norm_ends"
,
norm_ends
},
{
"norm_axes"
,
norm_axes
}};
}
}
argument
compute
(
const
dyn_output
&
dyn_out
,
std
::
vector
<
argument
>
args
)
const
argument
compute
(
const
dyn_output
&
dyn_out
,
std
::
vector
<
argument
>
args
)
const
{
{
auto
input
=
args
[
0
];
auto
input
=
args
[
0
];
auto
input_shape
=
input
.
get_shape
();
auto
input_shape
=
input
.
get_shape
();
switch
(
args
.
size
())
if
(
args
.
size
()
==
1
)
{
{
case
1
:
{
std
::
size_t
offset
=
compute_offset
(
input_shape
);
std
::
size_t
offset
=
compute_offset
(
input_shape
);
return
{
dyn_out
.
computed_shape
,
[
=
]
{
return
input
.
data
()
+
offset
;
}};
return
{
dyn_out
.
computed_shape
,
[
=
]
{
return
input
.
data
()
+
offset
;
}};
}
}
case
3
:
{
else
shape
calc_shape
;
{
std
::
size_t
offset
=
0
;
// Note that we re-normalize both the attributes and inputs because of the non-fixed
visit_all
(
args
[
1
],
args
[
2
])([
&
](
auto
input_starts
,
auto
input_ends
)
{
// dynamic input shape case. It's possible to only re-normalize if slicing over
auto
norm_inputs
=
normalize_inputs
(
input_shape
,
// non-fixed dynamic_dimensions.
input_starts
.
template
to_vector
<
int64_t
>(),
auto
set_attributes
=
get_set_attributes
();
input_ends
.
template
to_vector
<
int64_t
>());
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
int64_t
>>
norm_inputs
;
offset
=
compute_offset
(
input_shape
,
norm_inputs
.
at
(
"input_starts"
),
this
->
axes
);
if
(
set_attributes
==
ends_axes
)
calc_shape
=
{
input_shape
.
type
(),
{
lens_calc
(
input_shape
.
lens
(),
// attr ends and axes set; inputs are (data, input_starts)
norm_inputs
.
at
(
"input_starts"
),
args
[
1
].
visit
([
&
](
auto
input_starts
)
{
norm_inputs
.
at
(
"input_ends"
),
norm_inputs
=
this
->
axes
),
normalize_starts_ends_axes
(
input_shape
,
input_shape
.
strides
()};
input_starts
.
template
to_vector
<
int64_t
>(),
});
this
->
ends
,
return
{
calc_shape
,
[
=
]
{
return
input
.
data
()
+
offset
;
}};
this
->
axes
);
}
});
case
4
:
{
}
shape
calc_shape
;
else
if
(
set_attributes
==
starts_axes
)
std
::
size_t
offset
=
0
;
{
visit_all
(
args
[
1
],
args
[
2
],
args
[
3
])(
// attr starts and axes set; inputs are (data, input_ends)
[
&
](
auto
input_starts
,
auto
input_ends
,
auto
input_axes
)
{
args
[
1
].
visit
([
&
](
auto
input_ends
)
{
auto
norm_inputs
=
normalize_inputs
(
input_shape
,
norm_inputs
=
input_starts
.
template
to_vector
<
int64_t
>(),
normalize_starts_ends_axes
(
input_shape
,
input_ends
.
template
to_vector
<
int64_t
>(),
this
->
starts
,
input_axes
.
template
to_vector
<
int64_t
>());
input_ends
.
template
to_vector
<
int64_t
>(),
offset
=
compute_offset
(
this
->
axes
);
input_shape
,
norm_inputs
.
at
(
"input_starts"
),
norm_inputs
.
at
(
"input_axes"
));
});
calc_shape
=
shape
{
input_shape
.
type
(),
}
lens_calc
(
input_shape
.
lens
(),
else
if
(
set_attributes
==
starts_ends
)
norm_inputs
.
at
(
"input_starts"
),
{
norm_inputs
.
at
(
"input_ends"
),
// attr starts and ends set; inputs are (data, input_axes)
norm_inputs
.
at
(
"input_axes"
)),
args
[
1
].
visit
([
&
](
auto
input_axes
)
{
input_shape
.
strides
()};
norm_inputs
=
normalize_starts_ends_axes
(
input_shape
,
this
->
starts
,
this
->
ends
,
input_axes
.
template
to_vector
<
int64_t
>());
});
});
}
else
if
(
set_attributes
==
axes_only
)
{
// attr axes set; inputs are (data, input_starts, input_ends)
visit_all
(
args
[
1
],
args
[
2
])([
&
](
auto
input_starts
,
auto
input_ends
)
{
norm_inputs
=
normalize_starts_ends_axes
(
input_shape
,
input_starts
.
template
to_vector
<
int64_t
>(),
input_ends
.
template
to_vector
<
int64_t
>(),
this
->
axes
);
});
}
else
if
(
set_attributes
==
ends_only
)
{
// attr ends set; inputs are (data, input_starts, input_axes)
visit_all
(
args
[
1
],
args
[
2
])([
&
](
auto
input_starts
,
auto
input_axes
)
{
norm_inputs
=
normalize_starts_ends_axes
(
input_shape
,
input_starts
.
template
to_vector
<
int64_t
>(),
this
->
ends
,
input_axes
.
template
to_vector
<
int64_t
>());
});
}
else
if
(
set_attributes
==
starts_only
)
{
// attr starts set; inputs are (data, input_ends, input_axes)
visit_all
(
args
[
1
],
args
[
2
])([
&
](
auto
input_ends
,
auto
input_axes
)
{
norm_inputs
=
normalize_starts_ends_axes
(
input_shape
,
this
->
starts
,
input_ends
.
template
to_vector
<
int64_t
>(),
input_axes
.
template
to_vector
<
int64_t
>());
});
}
else
{
// no attr set, all inputs
visit_all
(
args
[
1
],
args
[
2
],
args
[
3
])(
[
&
](
auto
input_starts
,
auto
input_ends
,
auto
input_axes
)
{
norm_inputs
=
normalize_starts_ends_axes
(
input_shape
,
input_starts
.
template
to_vector
<
int64_t
>(),
input_ends
.
template
to_vector
<
int64_t
>(),
input_axes
.
template
to_vector
<
int64_t
>());
});
}
auto
offset
=
compute_offset
(
input_shape
,
norm_inputs
.
at
(
"norm_starts"
),
norm_inputs
.
at
(
"norm_axes"
));
shape
calc_shape
=
shape
{
input_shape
.
type
(),
lens_calc
(
input_shape
.
lens
(),
norm_inputs
.
at
(
"norm_starts"
),
norm_inputs
.
at
(
"norm_ends"
),
norm_inputs
.
at
(
"norm_axes"
)),
input_shape
.
strides
()};
return
{
calc_shape
,
[
=
]
{
return
input
.
data
()
+
offset
;
}};
return
{
calc_shape
,
[
=
]
{
return
input
.
data
()
+
offset
;
}};
}
}
default:
{
// Should never get here; covering in case some code change occurs
MIGRAPHX_THROW
(
"SLICE: invalid number of inputs"
);
}
}
}
}
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
)
const
{
return
0
;
}
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
)
const
{
return
0
;
}
...
...
src/include/migraphx/op/unary.hpp
View file @
a24ed87e
...
@@ -31,6 +31,7 @@
...
@@ -31,6 +31,7 @@
#include <migraphx/stringutils.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/value.hpp>
#include <migraphx/value.hpp>
#include <migraphx/dyn_output.hpp>
#include <migraphx/dyn_output.hpp>
#include <migraphx/par.hpp>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
@@ -84,10 +85,10 @@ struct unary : op_name<Derived>
...
@@ -84,10 +85,10 @@ struct unary : op_name<Derived>
argument
result
{
dyn_out
.
computed_shape
};
argument
result
{
dyn_out
.
computed_shape
};
result
.
visit
([
&
](
auto
output
)
{
result
.
visit
([
&
](
auto
output
)
{
args
[
0
].
visit
([
&
](
auto
input
)
{
args
[
0
].
visit
([
&
](
auto
input
)
{
std
::
transform
(
input
.
begin
(),
par_
transform
(
input
.
begin
(),
input
.
end
(),
input
.
end
(),
output
.
begin
(),
output
.
begin
(),
static_cast
<
const
Derived
&>
(
*
this
).
apply
());
static_cast
<
const
Derived
&>
(
*
this
).
apply
());
});
});
});
});
return
result
;
return
result
;
...
...
src/include/migraphx/op/unique.hpp
0 → 100644
View file @
a24ed87e
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_OPERATORS_UNIQUE_HPP
#define MIGRAPHX_GUARD_OPERATORS_UNIQUE_HPP
#include <migraphx/shape_for_each.hpp>
#include <migraphx/check_shapes.hpp>
#include <migraphx/config.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/tune_axis.hpp>
#include <utility>
#include <map>
#include <limits>
#include <optional>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
op
{
// https://onnx.ai/onnx/operators/onnx__Unique.html
// The Onnx spec refers to numpy specification, used as a reference:
// https://numpy.org/doc/stable/reference/generated/numpy.unique.html
// Input : Given an array of elements : X.
// Output(s) :
// 1. Find the unique elements (Y) of input (X).
//
// There are three outputs in addition to the unique elements in Y:
// 2. the indices of the input array that give the unique values
// 3. the indices of the unique array that reconstruct the input array
// 4. the number of times each unique value comes up in the input array
// Optional Attribute: 'Sorted' = 1 for sorted; = 0 for unsorted.
// Onnx specification makes 'sorted' a default, while Numpy always sorts.
//
// Optional Attribute: 'Axis' is 'None' (default) or a valid int < rank(X).
// Negative values are allowed.
//
// Numpy has the following important note on Axis:
// ------------------------------------------------------------------
// When an axis is specified the subarrays indexed by the axis are
// sorted. This is done by making the specified axis the first
// dimension of the array (move the axis to the first dimension to
// keep the order of the other axes) and then flattening the subarrays
// in C order. The flattened subarrays are then viewed as a structured
// type with each element given a label, with the effect that we end
// up with a 1-D array of structured types that can be treated in the
// same way as any other 1-D array. The result is that the flattened
// subarrays are sorted in lexicographic order starting with the first
// element.
// ------------------------------------------------------------------
struct
unique
{
template
<
class
T
>
auto
make_idx_less_fn
(
const
T
&
data
,
size_t
chunk_sz
)
const
{
return
[
&
data
,
chunk_sz
](
auto
idx1
,
auto
idx2
)
{
return
std
::
lexicographical_compare
(
data
.
begin
()
+
idx1
,
data
.
begin
()
+
idx1
+
chunk_sz
,
data
.
begin
()
+
idx2
,
data
.
begin
()
+
idx2
+
chunk_sz
);
};
}
// CASE SORTED:
//
// To process into a sorted unique series of elements/chunks:
// Chunk size == 1 means a simple element; >1 means a flat representation.
// Steps: first go through the input elements/chunks for uniqueness.
// At the end of this processing, per the sorted sequence of unique elements:
// update/create data structures: y, y_indices, x_rev_indices, y_count
//
// INPUT x: [2, 1, 1, 3, 4, 3], attr_sorted = 1;
// OUTPUT(s): indices..
// y_indices: [1, 0, 3, 4] --- first incidence, in terms of index in sequence x
// x_rev_indices: [1, 0, 0, 2, 3, 2] --- x seen in terms of indices of unique sequence y
// y_count: [2, 1, 2, 1] -- count at each y_index. sum = len(x)
// NOTE: y [1, 2, 3, 4] --- the unique output is constructed from x[y_indices[...]]
template
<
class
T
>
auto
sorted_uniq_indices
(
const
T
&
input_data
,
size_t
chunk_sz
)
const
{
struct
y_info
{
size_t
y_idx
;
size_t
x_idx
;
size_t
ct
=
0
;
};
auto
idx_less_fn
=
make_idx_less_fn
(
input_data
,
chunk_sz
);
std
::
map
<
size_t
,
y_info
,
decltype
(
idx_less_fn
)
>
uniq_val_map
(
idx_less_fn
);
std
::
tuple
<
std
::
vector
<
std
::
size_t
>
,
std
::
vector
<
std
::
size_t
>
,
std
::
vector
<
std
::
size_t
>>
rv
;
auto
&
[
y_indices
,
x_rev_indices
,
y_count
]
=
rv
;
// go through all the elements and find the unique elements..
size_t
count_x
=
input_data
.
size
();
for
(
size_t
f_idx
=
0
,
x_idx
=
0
;
f_idx
<
count_x
;
f_idx
+=
chunk_sz
,
x_idx
++
)
{
y_info
entry
=
{.
y_idx
=
uniq_val_map
.
size
(),
.
x_idx
=
x_idx
};
auto
[
itr
,
added_new
]
=
uniq_val_map
.
insert
({
f_idx
,
entry
});
itr
->
second
.
ct
++
;
x_rev_indices
.
push_back
(
itr
->
second
.
y_idx
);
}
std
::
vector
<
std
::
size_t
>
y2x_indices
(
uniq_val_map
.
size
());
y_indices
.
resize
(
uniq_val_map
.
size
());
y_count
.
resize
(
uniq_val_map
.
size
());
size_t
idx
=
0
;
// the unique elements are now sorted:
// post-processing for all the return indices.
for
(
const
auto
&
v
:
uniq_val_map
)
{
y2x_indices
[
v
.
second
.
y_idx
]
=
idx
;
y_indices
[
idx
]
=
v
.
second
.
x_idx
;
y_count
[
idx
]
=
v
.
second
.
ct
;
idx
++
;
}
// update x_rev_indices as per the sorted order of y_indices
for
(
auto
&
i
:
x_rev_indices
)
i
=
y2x_indices
[
i
];
return
rv
;
}
// CASE UNSORTED:
//
// To process into an un-sorted unique series of elements/chunks:
// For chunk size = 1 is a simple element, else use a flat representation of a tensor obj
// Go through the input elements/chunks one by one with inline processing of indices..
// INPUT x: [2, 1, 1, 3, 4, 3], attr_sorted = 0;
// OUTPUT(s): indices..
// y_indices: [0, 1, 3, 4] --- first incidence, in terms of index in sequence x
// x_rev_indices: [0, 1, 1, 2, 3, 2] --- x seen in terms of indices of unique sequence y
// y_count: [1, 2, 2, 1] -- count at each y_index. sum = len(x)
// NOTE: y [2, 1, 3, 4] --- the unique output is constructed from x[y_indices[...]]
// Output data structures: y_indices, x_rev_indices, y_count are processed inline.
template
<
class
T
>
auto
unsorted_uniq_indices
(
const
T
&
input_data
,
size_t
chunk_sz
)
const
{
auto
idx_less_fn
=
make_idx_less_fn
(
input_data
,
chunk_sz
);
std
::
map
<
size_t
,
size_t
,
decltype
(
idx_less_fn
)
>
uniq_val_map
(
idx_less_fn
);
// rv is used for NVRO below..
std
::
tuple
<
std
::
vector
<
std
::
size_t
>
,
std
::
vector
<
std
::
size_t
>
,
std
::
vector
<
std
::
size_t
>>
rv
;
auto
&
[
y_indices
,
x_rev_indices
,
y_count
]
=
rv
;
// go through all the elements and add the unique elements into the map..
// inline processing for outputs: y_indices, x_rev_indices, y_count
size_t
count_x
=
input_data
.
size
();
for
(
size_t
f_idx
=
0
;
f_idx
<
count_x
;
f_idx
+=
chunk_sz
)
{
auto
[
itr
,
added_new
]
=
uniq_val_map
.
insert
({
f_idx
,
y_indices
.
size
()});
if
(
added_new
)
{
y_count
.
push_back
(
0
);
y_indices
.
push_back
(
x_rev_indices
.
size
());
}
y_count
[
itr
->
second
]
++
;
x_rev_indices
.
push_back
(
itr
->
second
);
}
return
rv
;
}
// Axis. Default: none. Range: [-rank, rank-1]
std
::
optional
<
int64_t
>
axis
;
// Sorted, Default: 1= sorted. 0 = unsorted.
bool
sorted
=
true
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
pack
(
f
(
self
.
axis
,
"axis"
),
f
(
self
.
sorted
,
"sorted"
));
}
std
::
string
name
()
const
{
return
"unique"
;
}
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
{
check_shapes
{
inputs
,
*
this
}.
has
(
1
);
auto
&
sh_x
=
inputs
[
0
];
auto
lens_x
=
sh_x
.
lens
();
size_t
dim_x
=
sh_x
.
ndim
();
size_t
max_uniq_ct
=
sh_x
.
elements
();
std
::
vector
<
shape
::
dynamic_dimension
>
d_out
;
if
(
axis
)
{
int64_t
t_axis
=
migraphx
::
tune_axis
(
dim_x
,
*
axis
,
name
());
if
(
t_axis
!=
0
)
MIGRAPHX_THROW
(
"Unique: Only supports axis = 0 or None"
);
d_out
=
sh_x
.
to_dynamic
().
dyn_dims
();
// only axis = 0 is supported:
max_uniq_ct
=
lens_x
[
0
];
// min = 1 unique element; max = full dimension along axis 0
d_out
[
0
]
=
{
1
,
max_uniq_ct
};
}
else
{
d_out
.
push_back
({
1
,
max_uniq_ct
});
}
shape
sh_y
=
{
sh_x
.
type
(),
d_out
};
// The three outputted Indices are just 1-D:
shape
sh_idx
{
shape
::
int64_type
,
{
d_out
[
0
]}};
return
{{
sh_y
,
sh_idx
,
sh_idx
,
sh_idx
}};
}
argument
compute
(
const
dyn_output
&
dyn_out
,
std
::
vector
<
argument
>
args
)
const
{
auto
sh_x
=
args
.
front
().
get_shape
();
auto
lens_x
=
sh_x
.
lens
();
shape
output_shape
=
dyn_out
.
computed_shape
;
auto
vec_ss
=
output_shape
.
sub_shapes
();
auto
ct_x
=
sh_x
.
elements
();
shape
sh_y
=
{
vec_ss
[
0
].
type
(),
{
ct_x
}};
shape
sh_idx
=
{
vec_ss
[
1
].
type
(),
{
ct_x
}};
shape
sh_x_idx
=
{
vec_ss
[
1
].
type
(),
{
ct_x
}};
argument
res_y
{
sh_y
};
argument
res_y_idx
{
sh_idx
};
argument
res_x_rev_idx
{
sh_idx
};
argument
res_y_ct_idx
{
sh_idx
};
std
::
vector
<
size_t
>
out_y_idx
;
std
::
vector
<
size_t
>
out_x_rev_idx
;
std
::
vector
<
size_t
>
out_y_ct
;
// If axis is not none, for >1D tensors, we have to consider
// then, the uniqueness of chunks of sub-tensors: a subsequence of built-ins..
// For a built-in type, chunk_sz is of course = 1
size_t
chunk_sz
=
1
;
if
(
axis
)
chunk_sz
=
ct_x
/
lens_x
[
0
];
// axis = 0 is supported.
visit_all
(
args
.
front
(),
res_y
)([
&
](
auto
x
,
auto
y_flat
)
{
using
o_type
=
typename
decltype
(
x
)
::
value_type
;
std
::
vector
<
o_type
>
x_in
(
x
.
begin
(),
x
.
end
());
std
::
tie
(
out_y_idx
,
out_x_rev_idx
,
out_y_ct
)
=
sorted
?
sorted_uniq_indices
(
x_in
,
chunk_sz
)
:
unsorted_uniq_indices
(
x_in
,
chunk_sz
);
const
auto
uniq_ct
=
out_y_idx
.
size
();
// construct y from x[indices] in flattened form
// later we reshape y to the final shape..
auto
y_dst
=
y_flat
.
begin
();
for
(
size_t
idx
=
0
;
idx
<
uniq_ct
;
idx
++
)
y_dst
=
copy_n
(
x_in
.
begin
()
+
out_y_idx
[
idx
]
*
chunk_sz
,
chunk_sz
,
y_dst
);
std
::
vector
<
size_t
>
lens_y
;
// if axis is specified:
// the output shape keeps the n-1 dimensions of x
if
(
axis
)
{
lens_y
=
lens_x
;
lens_y
[
0
]
=
uniq_ct
;
}
else
{
lens_y
=
{
uniq_ct
};
}
sh_y
=
{
sh_y
.
type
(),
lens_y
};
sh_idx
=
{
sh_idx
.
type
(),
{
uniq_ct
}};
});
visit_all
(
res_y_idx
,
res_x_rev_idx
,
res_y_ct_idx
)(
[
&
](
auto
y_indices
,
auto
x_rev_indices
,
auto
y_count
)
{
std
::
copy
(
out_y_idx
.
begin
(),
out_y_idx
.
end
(),
y_indices
.
begin
());
std
::
copy
(
out_x_rev_idx
.
begin
(),
out_x_rev_idx
.
end
(),
x_rev_indices
.
begin
());
std
::
copy
(
out_y_ct
.
begin
(),
out_y_ct
.
end
(),
y_count
.
begin
());
sh_x_idx
=
{
sh_idx
.
type
(),
{
out_x_rev_idx
.
size
()}};
});
return
{{
res_y
.
reshape
(
sh_y
),
res_y_idx
.
reshape
(
sh_idx
),
res_x_rev_idx
.
reshape
(
sh_x_idx
),
res_y_ct_idx
.
reshape
(
sh_idx
)}};
}
};
}
// namespace op
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/include/migraphx/operators.hpp
View file @
a24ed87e
...
@@ -84,6 +84,7 @@
...
@@ -84,6 +84,7 @@
#include <migraphx/op/mod.hpp>
#include <migraphx/op/mod.hpp>
#include <migraphx/op/mul.hpp>
#include <migraphx/op/mul.hpp>
#include <migraphx/op/multibroadcast.hpp>
#include <migraphx/op/multibroadcast.hpp>
#include <migraphx/op/nearbyint.hpp>
#include <migraphx/op/neg.hpp>
#include <migraphx/op/neg.hpp>
#include <migraphx/op/nonmaxsuppression.hpp>
#include <migraphx/op/nonmaxsuppression.hpp>
#include <migraphx/op/nonzero.hpp>
#include <migraphx/op/nonzero.hpp>
...
@@ -110,7 +111,6 @@
...
@@ -110,7 +111,6 @@
#include <migraphx/op/rnn_variable_seq_lens.hpp>
#include <migraphx/op/rnn_variable_seq_lens.hpp>
#include <migraphx/op/rnn_var_sl_last_output.hpp>
#include <migraphx/op/rnn_var_sl_last_output.hpp>
#include <migraphx/op/roialign.hpp>
#include <migraphx/op/roialign.hpp>
#include <migraphx/op/round.hpp>
#include <migraphx/op/rsqrt.hpp>
#include <migraphx/op/rsqrt.hpp>
#include <migraphx/op/scalar.hpp>
#include <migraphx/op/scalar.hpp>
#include <migraphx/op/scatter_add.hpp>
#include <migraphx/op/scatter_add.hpp>
...
@@ -119,6 +119,8 @@
...
@@ -119,6 +119,8 @@
#include <migraphx/op/scatternd_add.hpp>
#include <migraphx/op/scatternd_add.hpp>
#include <migraphx/op/scatternd_none.hpp>
#include <migraphx/op/scatternd_none.hpp>
#include <migraphx/op/scatternd_mul.hpp>
#include <migraphx/op/scatternd_mul.hpp>
#include <migraphx/op/scatternd_max.hpp>
#include <migraphx/op/scatternd_min.hpp>
#include <migraphx/op/sigmoid.hpp>
#include <migraphx/op/sigmoid.hpp>
#include <migraphx/op/sign.hpp>
#include <migraphx/op/sign.hpp>
#include <migraphx/op/sinh.hpp>
#include <migraphx/op/sinh.hpp>
...
@@ -137,6 +139,7 @@
...
@@ -137,6 +139,7 @@
#include <migraphx/op/unary.hpp>
#include <migraphx/op/unary.hpp>
#include <migraphx/op/unary_not.hpp>
#include <migraphx/op/unary_not.hpp>
#include <migraphx/op/undefined.hpp>
#include <migraphx/op/undefined.hpp>
#include <migraphx/op/unique.hpp>
#include <migraphx/op/unknown.hpp>
#include <migraphx/op/unknown.hpp>
#include <migraphx/op/unsqueeze.hpp>
#include <migraphx/op/unsqueeze.hpp>
#include <migraphx/op/where.hpp>
#include <migraphx/op/where.hpp>
...
...
src/include/migraphx/par.hpp
0 → 100644
View file @
a24ed87e
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_MIGRAPHX_PAR_HPP
#define MIGRAPHX_GUARD_MIGRAPHX_PAR_HPP
#include <migraphx/config.hpp>
#if MIGRAPHX_HAS_EXECUTORS
#include <execution>
#else
#include <migraphx/simple_par_for.hpp>
#endif
#include <algorithm>
#include <mutex>
#include <vector>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
detail
{
struct
exception_list
{
std
::
vector
<
std
::
exception_ptr
>
exceptions
;
std
::
mutex
m
;
void
add_exception
()
{
std
::
lock_guard
<
std
::
mutex
>
guard
(
m
);
exceptions
.
push_back
(
std
::
current_exception
());
}
template
<
class
F
>
auto
collect
(
F
f
)
{
return
[
f
,
this
](
auto
&&
...
xs
)
{
try
{
f
(
std
::
forward
<
decltype
(
xs
)
>
(
xs
)...);
}
catch
(...)
{
this
->
add_exception
();
}
};
}
void
throw_if_exception
()
const
{
if
(
not
exceptions
.
empty
())
std
::
rethrow_exception
(
exceptions
.
front
());
}
};
}
// namespace detail
template
<
class
InputIt
,
class
OutputIt
,
class
UnaryOperation
>
OutputIt
par_transform
(
InputIt
first1
,
InputIt
last1
,
OutputIt
d_first
,
UnaryOperation
unary_op
)
{
#if MIGRAPHX_HAS_EXECUTORS
return
std
::
transform
(
std
::
execution
::
par
,
first1
,
last1
,
d_first
,
std
::
move
(
unary_op
));
#else
simple_par_for
(
last1
-
first1
,
[
&
](
auto
i
)
{
d_first
[
i
]
=
unary_op
(
first1
[
i
]);
});
return
d_first
+
(
last1
-
first1
);
#endif
}
template
<
class
InputIt1
,
class
InputIt2
,
class
OutputIt
,
class
BinaryOperation
>
OutputIt
par_transform
(
InputIt1
first1
,
InputIt1
last1
,
InputIt2
first2
,
OutputIt
d_first
,
BinaryOperation
binary_op
)
{
#if MIGRAPHX_HAS_EXECUTORS
return
std
::
transform
(
std
::
execution
::
par
,
first1
,
last1
,
first2
,
d_first
,
std
::
move
(
binary_op
));
#else
simple_par_for
(
last1
-
first1
,
[
&
](
auto
i
)
{
d_first
[
i
]
=
binary_op
(
first1
[
i
],
first2
[
i
]);
});
return
d_first
+
(
last1
-
first1
);
#endif
}
template
<
class
InputIt
,
class
UnaryFunction
>
void
par_for_each
(
InputIt
first
,
InputIt
last
,
UnaryFunction
f
)
{
#if MIGRAPHX_HAS_EXECUTORS
// Propagate the exception
detail
::
exception_list
ex
;
std
::
for_each
(
std
::
execution
::
par
,
first
,
last
,
ex
.
collect
(
std
::
move
(
f
)));
ex
.
throw_if_exception
();
#else
simple_par_for
(
last
-
first
,
[
&
](
auto
i
)
{
f
(
first
[
i
]);
});
#endif
}
template
<
class
...
Ts
>
auto
par_copy_if
(
Ts
&&
...
xs
)
{
#if MIGRAPHX_HAS_EXECUTORS
return
std
::
copy_if
(
std
::
execution
::
par
,
std
::
forward
<
Ts
>
(
xs
)...);
#else
return
std
::
copy_if
(
std
::
forward
<
Ts
>
(
xs
)...);
#endif
}
template
<
class
...
Ts
>
auto
par_sort
(
Ts
&&
...
xs
)
{
#if MIGRAPHX_HAS_EXECUTORS
return
std
::
sort
(
std
::
execution
::
par
,
std
::
forward
<
Ts
>
(
xs
)...);
#else
return
std
::
sort
(
std
::
forward
<
Ts
>
(
xs
)...);
#endif
}
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif // MIGRAPHX_GUARD_MIGRAPHX_PAR_HPP
src/include/migraphx/par_for.hpp
View file @
a24ed87e
...
@@ -24,93 +24,23 @@
...
@@ -24,93 +24,23 @@
#ifndef MIGRAPHX_GUARD_RTGLIB_PAR_FOR_HPP
#ifndef MIGRAPHX_GUARD_RTGLIB_PAR_FOR_HPP
#define MIGRAPHX_GUARD_RTGLIB_PAR_FOR_HPP
#define MIGRAPHX_GUARD_RTGLIB_PAR_FOR_HPP
#include <thread>
#include <migraphx/par.hpp>
#include <cmath>
#include <migraphx/ranges.hpp>
#include <algorithm>
#include <vector>
#include <cassert>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
struct
joinable_thread
:
std
::
thread
{
template
<
class
...
Xs
>
joinable_thread
(
Xs
&&
...
xs
)
:
std
::
thread
(
std
::
forward
<
Xs
>
(
xs
)...)
// NOLINT
{
}
joinable_thread
&
operator
=
(
joinable_thread
&&
other
)
=
default
;
joinable_thread
(
joinable_thread
&&
other
)
=
default
;
~
joinable_thread
()
{
if
(
this
->
joinable
())
this
->
join
();
}
};
template
<
class
F
>
auto
thread_invoke
(
std
::
size_t
i
,
std
::
size_t
tid
,
F
f
)
->
decltype
(
f
(
i
,
tid
))
{
f
(
i
,
tid
);
}
template
<
class
F
>
auto
thread_invoke
(
std
::
size_t
i
,
std
::
size_t
,
F
f
)
->
decltype
(
f
(
i
))
{
f
(
i
);
}
template
<
class
F
>
void
par_for_impl
(
std
::
size_t
n
,
std
::
size_t
threadsize
,
F
f
)
{
if
(
threadsize
<=
1
)
{
for
(
std
::
size_t
i
=
0
;
i
<
n
;
i
++
)
thread_invoke
(
i
,
0
,
f
);
}
else
{
std
::
vector
<
joinable_thread
>
threads
(
threadsize
);
// Using const here causes gcc 5 to ICE
#if(!defined(__GNUC__) || __GNUC__ != 5)
const
#endif
std
::
size_t
grainsize
=
std
::
ceil
(
static_cast
<
double
>
(
n
)
/
threads
.
size
());
std
::
size_t
work
=
0
;
std
::
size_t
tid
=
0
;
std
::
generate
(
threads
.
begin
(),
threads
.
end
(),
[
=
,
&
work
,
&
tid
]
{
auto
result
=
joinable_thread
([
=
]
{
std
::
size_t
start
=
work
;
std
::
size_t
last
=
std
::
min
(
n
,
work
+
grainsize
);
for
(
std
::
size_t
i
=
start
;
i
<
last
;
i
++
)
{
thread_invoke
(
i
,
tid
,
f
);
}
});
work
+=
grainsize
;
++
tid
;
return
result
;
});
assert
(
work
>=
n
);
}
}
template
<
class
F
>
template
<
class
F
>
void
par_for
(
std
::
size_t
n
,
std
::
size_t
min_grain
,
F
f
)
void
par_for
(
std
::
size_t
n
,
F
f
)
{
{
const
auto
threadsize
=
std
::
min
<
std
::
size_t
>
(
std
::
thread
::
hardware_concurrency
(),
using
iterator
=
basic_iota_iterator
<
id
,
std
::
size_t
>
;
n
/
std
::
max
<
std
::
size_t
>
(
1
,
min_grain
));
par_for_each
(
iterator
{
0
,
{}},
iterator
{
n
,
{}},
f
);
par_for_impl
(
n
,
threadsize
,
f
);
}
}
template
<
class
F
>
template
<
class
F
>
void
par_for
(
std
::
size_t
n
,
F
f
)
void
par_for
(
std
::
size_t
n
,
std
::
size_t
,
F
f
)
{
{
const
int
min_grain
=
8
;
par_for
(
n
,
f
);
par_for
(
n
,
min_grain
,
f
);
}
}
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace MIGRAPHX_INLINE_NS
...
...
src/include/migraphx/rewrite_pooling.hpp
View file @
a24ed87e
...
@@ -26,6 +26,7 @@
...
@@ -26,6 +26,7 @@
#include <string>
#include <string>
#include <migraphx/config.hpp>
#include <migraphx/config.hpp>
#include <migraphx/instruction_ref.hpp>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
...
src/include/migraphx/shape.hpp
View file @
a24ed87e
...
@@ -34,6 +34,7 @@
...
@@ -34,6 +34,7 @@
#include <migraphx/functional.hpp>
#include <migraphx/functional.hpp>
#include <migraphx/errors.hpp>
#include <migraphx/errors.hpp>
#include <migraphx/half.hpp>
#include <migraphx/half.hpp>
#include <migraphx/float8.hpp>
#include <migraphx/serialize.hpp>
#include <migraphx/serialize.hpp>
#include <migraphx/config.hpp>
#include <migraphx/config.hpp>
...
@@ -60,7 +61,8 @@ struct MIGRAPHX_EXPORT shape
...
@@ -60,7 +61,8 @@ struct MIGRAPHX_EXPORT shape
m(int32_type, int32_t) \
m(int32_type, int32_t) \
m(int64_type, int64_t) \
m(int64_type, int64_t) \
m(uint32_type, uint32_t) \
m(uint32_type, uint32_t) \
m(uint64_type, uint64_t)
m(uint64_type, uint64_t) \
m(fp8e4m3fnuz_type, migraphx::fp8::fp8e4m3fnuz)
// clang-format on
// clang-format on
#define MIGRAPHX_SHAPE_GENERATE_ENUM_TYPES(x, t) x,
#define MIGRAPHX_SHAPE_GENERATE_ENUM_TYPES(x, t) x,
...
...
src/
targets/gpu/
include/migraphx/
gpu/pad
.hpp
→
src/include/migraphx/
simple_par_for
.hpp
View file @
a24ed87e
...
@@ -21,40 +21,98 @@
...
@@ -21,40 +21,98 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* THE SOFTWARE.
*/
*/
#ifndef MIGRAPHX_GUARD_RTGLIB_
PAD
_HPP
#ifndef MIGRAPHX_GUARD_RTGLIB_
SIMPLE_PAR_FOR
_HPP
#define MIGRAPHX_GUARD_RTGLIB_
PAD
_HPP
#define MIGRAPHX_GUARD_RTGLIB_
SIMPLE_PAR_FOR
_HPP
#include <migraphx/argument.hpp>
#include <thread>
#include <migraphx/reflect.hpp>
#include <cmath>
#include <migraphx/op/pad.hpp>
#include <algorithm>
#include <vector>
#include <cassert>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
struct
context
;
struct
joinable_thread
:
std
::
thread
struct
hip_pad
{
{
op
::
pad
op
;
template
<
class
...
Xs
>
joinable_thread
(
Xs
&&
...
xs
)
:
std
::
thread
(
std
::
forward
<
Xs
>
(
xs
)...)
// NOLINT
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
{
return
migraphx
::
reflect
(
self
.
op
,
f
);
}
}
std
::
string
name
()
const
{
return
"gpu::pad"
;
}
joinable_thread
&
operator
=
(
joinable_thread
&&
other
)
=
default
;
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
;
joinable_thread
(
joinable_thread
&&
other
)
=
default
;
argument
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
;
~
joinable_thread
()
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
{
return
shapes
.
size
()
-
1
;
if
(
this
->
joinable
())
this
->
join
();
}
}
};
};
}
// namespace gpu
template
<
class
F
>
auto
thread_invoke
(
std
::
size_t
i
,
std
::
size_t
tid
,
F
f
)
->
decltype
(
f
(
i
,
tid
))
{
f
(
i
,
tid
);
}
template
<
class
F
>
auto
thread_invoke
(
std
::
size_t
i
,
std
::
size_t
,
F
f
)
->
decltype
(
f
(
i
))
{
f
(
i
);
}
template
<
class
F
>
void
simple_par_for_impl
(
std
::
size_t
n
,
std
::
size_t
threadsize
,
F
f
)
{
if
(
threadsize
<=
1
)
{
for
(
std
::
size_t
i
=
0
;
i
<
n
;
i
++
)
thread_invoke
(
i
,
0
,
f
);
}
else
{
std
::
vector
<
joinable_thread
>
threads
(
threadsize
);
// Using const here causes gcc 5 to ICE
#if(!defined(__GNUC__) || __GNUC__ != 5)
const
#endif
std
::
size_t
grainsize
=
std
::
ceil
(
static_cast
<
double
>
(
n
)
/
threads
.
size
());
std
::
size_t
work
=
0
;
std
::
size_t
tid
=
0
;
std
::
generate
(
threads
.
begin
(),
threads
.
end
(),
[
=
,
&
work
,
&
tid
]
{
auto
result
=
joinable_thread
([
=
]
{
std
::
size_t
start
=
work
;
std
::
size_t
last
=
std
::
min
(
n
,
work
+
grainsize
);
for
(
std
::
size_t
i
=
start
;
i
<
last
;
i
++
)
{
thread_invoke
(
i
,
tid
,
f
);
}
});
work
+=
grainsize
;
++
tid
;
return
result
;
});
assert
(
work
>=
n
);
}
}
template
<
class
F
>
void
simple_par_for
(
std
::
size_t
n
,
std
::
size_t
min_grain
,
F
f
)
{
const
auto
threadsize
=
std
::
min
<
std
::
size_t
>
(
std
::
thread
::
hardware_concurrency
(),
n
/
std
::
max
<
std
::
size_t
>
(
1
,
min_grain
));
simple_par_for_impl
(
n
,
threadsize
,
f
);
}
template
<
class
F
>
void
simple_par_for
(
std
::
size_t
n
,
F
f
)
{
const
int
min_grain
=
8
;
simple_par_for
(
n
,
min_grain
,
f
);
}
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
}
// namespace migraphx
...
...
Prev
1
2
3
4
5
6
7
8
…
20
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment