Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
a8a8d868
Commit
a8a8d868
authored
May 10, 2022
by
Paul
Browse files
Merge branch 'develop' into jit-softmax
parents
bb0fff52
5e5ed37a
Changes
35
Hide whitespace changes
Inline
Side-by-side
Showing
15 changed files
with
376 additions
and
105 deletions
+376
-105
src/targets/gpu/CMakeLists.txt
src/targets/gpu/CMakeLists.txt
+1
-1
src/targets/gpu/compile_hip.cpp
src/targets/gpu/compile_hip.cpp
+12
-0
src/targets/gpu/jit/pointwise.cpp
src/targets/gpu/jit/pointwise.cpp
+85
-18
src/targets/gpu/jit/scatternd.cpp
src/targets/gpu/jit/scatternd.cpp
+1
-2
src/targets/gpu/kernels/include/migraphx/kernels/functional.hpp
...rgets/gpu/kernels/include/migraphx/kernels/functional.hpp
+45
-19
src/targets/gpu/kernels/include/migraphx/kernels/pointwise.hpp
...argets/gpu/kernels/include/migraphx/kernels/pointwise.hpp
+8
-11
src/targets/gpu/kernels/include/migraphx/kernels/preload.hpp
src/targets/gpu/kernels/include/migraphx/kernels/preload.hpp
+45
-1
src/targets/gpu/kernels/include/migraphx/kernels/roialign.hpp
...targets/gpu/kernels/include/migraphx/kernels/roialign.hpp
+17
-19
src/targets/gpu/kernels/include/migraphx/kernels/vec.hpp
src/targets/gpu/kernels/include/migraphx/kernels/vec.hpp
+10
-1
src/targets/gpu/kernels/include/migraphx/kernels/vectorize.hpp
...argets/gpu/kernels/include/migraphx/kernels/vectorize.hpp
+31
-15
src/targets/ref/lowering.cpp
src/targets/ref/lowering.cpp
+1
-1
test/api/test_module_construct.cpp
test/api/test_module_construct.cpp
+5
-7
test/gpu/jit.cpp
test/gpu/jit.cpp
+81
-0
test/py/test_module_construct.py
test/py/test_module_construct.py
+12
-10
test/py/test_numpy.py
test/py/test_numpy.py
+22
-0
No files found.
src/targets/gpu/CMakeLists.txt
View file @
a8a8d868
...
...
@@ -93,7 +93,7 @@ add_library(migraphx_device
)
add_library
(
compile_for_gpu INTERFACE
)
target_compile_options
(
compile_for_gpu INTERFACE -std=c++17 -fno-gpu-rdc -Wno-cuda-compat -Wno-unused-command-line-argument -Xclang -fallow-half-arguments-and-returns
)
target_link_libraries
(
compile_for_gpu INTERFACE hip::device -fno-gpu-rdc -Wno-invalid-command-line-argument -Wno-unused-command-line-argument
)
target_link_libraries
(
compile_for_gpu INTERFACE hip::device -fno-gpu-rdc -Wno-invalid-command-line-argument -Wno-unused-command-line-argument
-Wno-option-ignored
)
check_cxx_compiler_flag
(
"--cuda-host-only -fhip-lambda-host-device -x hip"
HAS_HIP_LAMBDA_HOST_DEVICE
)
if
(
HAS_HIP_LAMBDA_HOST_DEVICE
)
message
(
STATUS
"Enable -fhip-lambda-host-device"
)
...
...
src/targets/gpu/compile_hip.cpp
View file @
a8a8d868
...
...
@@ -22,6 +22,7 @@ namespace gpu {
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_GPU_DEBUG
);
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_GPU_OPTIMIZE
);
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_GPU_DUMP_ASM
);
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_GPU_DUMP_SRC
);
#if MIGRAPHX_USE_HIPRTC
...
...
@@ -133,6 +134,7 @@ struct hiprtc_program
std
::
vector
<
char
>
buffer
(
n
);
MIGRAPHX_HIPRTC
(
hiprtcGetProgramLog
(
prog
.
get
(),
buffer
.
data
()));
assert
(
buffer
.
back
()
==
0
);
// cppcheck-suppress returnDanglingLifetime
return
{
buffer
.
begin
(),
buffer
.
end
()
-
1
};
}
...
...
@@ -246,6 +248,16 @@ compile_hip_src(const std::vector<src_file>& srcs, std::string params, const std
MIGRAPHX_THROW
(
"Missing hsaco"
);
};
if
(
enabled
(
MIGRAPHX_GPU_DUMP_SRC
{}))
{
for
(
const
auto
&
src
:
srcs
)
{
if
(
src
.
path
.
extension
()
!=
".cpp"
)
continue
;
std
::
cout
<<
std
::
string
(
src
.
content
.
first
,
src
.
len
())
<<
std
::
endl
;
}
}
if
(
enabled
(
MIGRAPHX_GPU_DUMP_ASM
{}))
{
...
...
src/targets/gpu/jit/pointwise.cpp
View file @
a8a8d868
...
...
@@ -6,6 +6,7 @@
#include <migraphx/cpp_generator.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/reduce_dims.hpp>
#include <migraphx/permutation.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/dead_code_elimination.hpp>
#include <migraphx/eliminate_common_subexpression.hpp>
...
...
@@ -28,7 +29,8 @@ ${preamble}
extern "C" {
__global__ void kernel(${params})
{
pointwise(${lambda}, ${args});
auto idx = make_index();
pointwise(idx, auto_preload<${preloads}>(idx), vectorize<${vec_size}, ${axis}>())(${lambda}, ${args});
}
}
...
...
@@ -41,40 +43,105 @@ struct pointwise_compiler : compiler<pointwise_compiler>
{
std
::
vector
<
std
::
string
>
names
()
const
{
return
{
"pointwise"
};
}
static
std
::
size_t
oversubscribe
(
const
std
::
vector
<
shape
>&
inputs
)
static
std
::
size_t
oversubscribe
_if
(
bool
b
)
{
if
(
std
::
any_of
(
inputs
.
begin
(),
inputs
.
end
(),
[](
const
auto
&
s
)
{
return
s
.
broadcasted
();
}))
return
1
;
else
if
(
b
)
return
256
;
else
return
1
;
}
static
std
::
size_t
find_fast_axis
(
const
std
::
vector
<
shape
>&
inputs
)
{
auto
permutation
=
find_permutation
(
inputs
);
auto
it
=
std
::
max_element
(
permutation
.
begin
(),
permutation
.
end
());
return
it
-
permutation
.
begin
();
}
static
std
::
size_t
vectorize_elements
(
const
std
::
vector
<
shape
>&
inputs
)
static
std
::
vector
<
bool
>
preload
(
std
::
size_t
axis
,
const
std
::
vector
<
shape
>&
inputs
)
{
std
::
size_t
n
=
inputs
.
front
().
elements
();
const
std
::
size_t
max_lds_bytes
=
4096
;
std
::
vector
<
bool
>
result
;
std
::
transform
(
inputs
.
begin
(),
inputs
.
end
(),
std
::
back_inserter
(
result
),
[
&
](
const
shape
&
input
)
{
return
input
.
strides
()[
axis
]
==
0
;
});
auto
bytes
=
std
::
inner_product
(
inputs
.
begin
(),
inputs
.
end
(),
result
.
begin
(),
std
::
size_t
{
0
},
std
::
plus
<>
{},
[](
const
shape
&
s
,
bool
b
)
->
std
::
size_t
{
if
(
b
)
return
s
.
bytes
();
return
0
;
});
if
(
bytes
<
max_lds_bytes
)
return
result
;
// TODO: Try to partially preload items
std
::
fill
(
result
.
begin
(),
result
.
end
(),
false
);
return
result
;
}
static
std
::
string
preload_str
(
const
std
::
vector
<
bool
>&
bs
)
{
std
::
vector
<
std
::
string
>
bool_strs
;
std
::
transform
(
bs
.
begin
(),
std
::
prev
(
bs
.
end
()),
std
::
back_inserter
(
bool_strs
),
[](
bool
b
)
{
if
(
b
)
return
"true"
;
return
"false"
;
});
return
"false, "
+
join_strings
(
bool_strs
,
", "
);
}
static
std
::
vector
<
std
::
size_t
>
vector_sizes
(
const
std
::
vector
<
shape
>&
inputs
)
{
// If all inputs is half then only use half2
if
(
std
::
all_of
(
inputs
.
begin
(),
inputs
.
end
(),
[](
const
auto
&
s
)
{
return
s
.
packed
()
or
s
.
broadcasted
()
;
return
s
.
type
()
==
shape
::
half_type
;
}))
{
if
((
n
%
4
)
==
0
)
return
n
/
4
;
else
if
((
n
%
2
)
==
0
)
return
n
/
2
;
}
return
n
;
return
{
2
};
return
{
4
,
2
};
}
static
auto
vectorize_elements
(
std
::
size_t
axis
,
const
std
::
vector
<
shape
>&
inputs
)
{
auto
sizes
=
vector_sizes
(
inputs
);
std
::
vector
<
std
::
size_t
>
max_vec_size
;
std
::
transform
(
inputs
.
begin
(),
inputs
.
end
(),
std
::
back_inserter
(
max_vec_size
),
[
&
](
const
auto
&
input
)
->
std
::
size_t
{
auto
stride
=
input
.
strides
()[
axis
];
auto
len
=
input
.
lens
()[
axis
];
if
(
stride
!=
0
and
stride
!=
1
)
return
1
;
auto
it
=
std
::
find_if
(
sizes
.
begin
(),
sizes
.
end
(),
[
&
](
auto
i
)
{
return
(
len
%
i
)
==
0
;
});
if
(
it
!=
sizes
.
end
())
return
*
it
;
return
1
;
});
return
*
std
::
min_element
(
max_vec_size
.
begin
(),
max_vec_size
.
end
());
}
operation
compile_op
(
context
&
ctx
,
const
std
::
vector
<
shape
>&
inputs
,
const
value
&
v
)
const
{
hip_compile_options
options
;
options
.
set_launch_params
(
v
,
compute_global_for
(
ctx
,
vectorize_elements
(
inputs
),
oversubscribe
(
inputs
)));
options
.
inputs
=
inputs
;
options
.
output
=
inputs
.
back
();
options
.
virtual_inputs
=
reduce_dims
(
inputs
);
options
.
params
=
"-Wno-float-equal"
;
auto
src
=
interpolate_string
(
pointwise_kernel
,
auto
axis
=
find_fast_axis
(
options
.
virtual_inputs
);
auto
vec_size
=
vectorize_elements
(
axis
,
options
.
virtual_inputs
);
auto
preloads
=
preload
(
axis
,
options
.
virtual_inputs
);
auto
is_preloading
=
std
::
accumulate
(
preloads
.
begin
(),
preloads
.
end
(),
false
,
std
::
logical_or
<>
{});
options
.
set_launch_params
(
v
,
compute_global_for
(
ctx
,
options
.
output
.
elements
()
/
vec_size
,
oversubscribe_if
(
not
is_preloading
)));
auto
src
=
interpolate_string
(
pointwise_kernel
,
{{
"params"
,
enum_params
(
inputs
.
size
(),
"void * private_p"
)},
{
"args"
,
enum_params
(
inputs
.
size
(),
"private_p"
)},
{
"lambda"
,
v
.
at
(
"lambda"
).
to
<
std
::
string
>
()},
{
"vec_size"
,
std
::
to_string
(
vec_size
)},
{
"axis"
,
std
::
to_string
(
axis
)},
{
"preloads"
,
preload_str
(
preloads
)},
{
"preamble"
,
v
.
get
(
"preamble"
,
std
::
string
{})}});
return
compile_hip_code_object
(
src
,
options
);
}
...
...
src/targets/gpu/jit/scatternd.cpp
View file @
a8a8d868
...
...
@@ -52,9 +52,8 @@ struct scatternd_compiler : compiler<scatternd_compiler>
{
hip_compile_options
options
;
options
.
set_launch_params
(
v
,
compute_global_for
(
ctx
,
inputs
.
at
(
1
).
elements
()));
auto
out_s
=
inputs
.
back
();
options
.
inputs
=
inputs
;
options
.
output
=
out_s
;
options
.
output
=
inputs
.
back
()
;
options
.
kernel_name
=
"scatternd_kernel"
;
options
.
virtual_inputs
=
inputs
;
auto
reduction
=
"assign_"
+
v
.
get
(
"reduction"
,
std
::
string
{
"none"
});
...
...
src/targets/gpu/kernels/include/migraphx/kernels/functional.hpp
View file @
a8a8d868
...
...
@@ -3,6 +3,14 @@
#include <migraphx/kernels/integral_constant.hpp>
// NOLINTNEXTLINE
#define MIGRAPHX_RETURNS(...) \
->decltype(__VA_ARGS__) { return __VA_ARGS__; }
// NOLINTNEXTLINE
#define MIGRAPHX_LIFT(...) \
[](auto&&... xs) MIGRAPHX_RETURNS((__VA_ARGS__)(static_cast<decltype(xs)>(xs)...))
namespace
migraphx
{
struct
swallow
...
...
@@ -161,6 +169,18 @@ constexpr auto pack(Ts... xs)
return
[
=
](
auto
f
)
{
return
f
(
xs
...);
};
}
template
<
class
G
,
class
F
>
constexpr
auto
join
(
G
g
,
F
f
)
{
return
f
([
=
](
auto
...
xs
)
{
return
g
(
xs
...);
});
}
template
<
class
G
,
class
F
,
class
...
Fs
>
constexpr
auto
join
(
G
g
,
F
f
,
Fs
...
fs
)
{
return
f
([
=
](
auto
...
xs
)
{
return
join
([
=
](
auto
...
ys
)
{
return
g
(
xs
...,
ys
...);
},
fs
...);
});
}
template
<
class
Compare
,
class
P1
,
class
P2
>
constexpr
auto
pack_compare
(
Compare
compare
,
P1
p1
,
P2
p2
)
{
...
...
@@ -191,39 +211,45 @@ constexpr auto arg(IntegralConstant ic)
return
arg_c
<
ic
>
();
}
inline
constexpr
auto
rotate_last
()
template
<
class
F
>
constexpr
auto
make_transform
(
F
f
)
{
return
[](
auto
...
xs
)
{
return
[
=
](
auto
&&
f
)
{
return
sequence_c
<
sizeof
...(
xs
)
>
([
&
](
auto
...
is
)
{
constexpr
auto
size
=
sizeof
...(
is
);
return
f
(
arg_c
<
(
is
+
size
-
1
)
%
size
>
()(
xs
...)...);
});
};
};
return
[
=
](
auto
...
xs
)
{
return
[
=
](
auto
g
)
{
return
f
(
g
,
xs
...);
};
};
}
// An arg transformation takes the arguments and then a function to take the new arguments:
// transform(xs...)([](auto... ys) { ... })
// The transform_args function takes a list of transformations and continually applies them
template
<
class
F
>
constexpr
auto
transform_args
(
F
f
)
{
return
[
=
](
auto
...
xs
)
{
return
[
=
](
auto
g
)
{
return
f
(
xs
...)([
&
](
auto
...
ys
)
{
return
g
(
ys
...);
});
};
};
return
f
;
}
template
<
class
F
,
class
...
Fs
>
constexpr
auto
transform_args
(
F
f
,
Fs
...
fs
)
{
return
[
=
](
auto
...
xs
)
{
return
transform_args
(
f
)(
xs
...)(
transform_args
(
fs
...));
};
return
make_transform
([
=
](
auto
g
,
auto
...
xs
)
{
return
f
(
xs
...)([
=
](
auto
...
ys
)
{
return
transform_args
(
fs
...)(
ys
...)(
g
);
});
});
}
// NOLINTNEXTLINE
#define MIGRAPHX_RETURNS(...) \
->decltype(__VA_ARGS__) { return __VA_ARGS__; }
// identity transform
inline
constexpr
auto
transform_args
()
{
return
make_transform
([](
auto
f
,
auto
...
xs
)
{
return
f
(
xs
...);
});
}
// NOLINTNEXTLINE
#define MIGRAPHX_LIFT(...) \
[](auto&&... xs) MIGRAPHX_RETURNS((__VA_ARGS__)(static_cast<decltype(xs)>(xs)...))
// Rotate the first argument to the last argument
inline
constexpr
auto
rotate_last
()
{
return
make_transform
([](
auto
f
,
auto
...
xs
)
{
return
sequence_c
<
sizeof
...(
xs
)
>
([
&
](
auto
...
is
)
{
constexpr
auto
size
=
sizeof
...(
is
);
return
f
(
arg_c
<
(
is
+
size
-
1
)
%
size
>
()(
xs
...)...);
});
});
}
}
// namespace migraphx
#endif // MIGRAPHX_GUARD_KERNELS_FUNCTIONAL_HPP
src/targets/gpu/kernels/include/migraphx/kernels/pointwise.hpp
View file @
a8a8d868
...
...
@@ -38,20 +38,17 @@ constexpr implicit_conversion_op<T> implicit_conversion(T x)
template
<
class
F
,
class
T
,
class
...
Ts
>
__device__
void
pointwise_tensor
(
index
idx
,
F
f
,
T
out
,
Ts
...
xs
)
{
preload
<
typename
T
::
type
>
(
idx
,
xs
...)([
&
](
auto
...
ps
)
{
idx
.
global_stride
(
out
.
get_shape
().
elements
(),
[
&
](
auto
i
)
{
out
[
i
]
=
implicit_conversion
(
f
(
ps
[
i
]...));
});
});
idx
.
global_stride
(
out
.
get_shape
().
elements
(),
[
&
](
auto
i
)
{
out
[
i
]
=
implicit_conversion
(
f
(
xs
[
i
]...));
});
}
template
<
class
F
,
class
...
T
s
>
__device__
void
pointwise
(
F
f
,
Ts
*
...
p
s
)
template
<
class
...
Transform
s
>
__device__
auto
pointwise
(
index
idx
,
Transforms
...
transform
s
)
{
auto
t
=
transform_args
(
make_tensors
(),
rotate_last
(),
auto_vectorize
());
t
(
ps
...)([
&
](
auto
...
xs
)
{
auto
idx
=
make_index
();
pointwise_tensor
(
idx
,
f
,
xs
...);
});
return
[
=
](
auto
f
,
auto
*
...
ps
)
{
auto
t
=
transform_args
(
make_tensors
(),
rotate_last
(),
transforms
...);
t
(
ps
...)([
&
](
auto
...
xs
)
{
pointwise_tensor
(
idx
,
f
,
xs
...);
});
};
}
}
// namespace migraphx
...
...
src/targets/gpu/kernels/include/migraphx/kernels/preload.hpp
View file @
a8a8d868
...
...
@@ -3,6 +3,8 @@
#include <migraphx/kernels/index.hpp>
#include <migraphx/kernels/functional.hpp>
#include <migraphx/kernels/tensor_view.hpp>
#include <migraphx/kernels/vec.hpp>
namespace
migraphx
{
...
...
@@ -73,7 +75,7 @@ __device__ auto preload_copy(index idx, F f, __shared__ T* buffer, Ts... xs)
{
if
constexpr
(
decltype
(
tensor_vec_size
(
x
)){}
==
0
)
{
auto
v
=
vectorize
(
x
);
auto
v
=
auto_
vectorize
(
x
);
auto
b
=
as_vec
(
tensor_vec_size
(
v
),
buffer
+
offset
);
idx
.
local_stride
(
v
.
get_shape
().
element_space
(),
[
&
](
auto
i
)
{
b
[
i
]
=
v
.
data
()[
i
];
});
...
...
@@ -126,5 +128,47 @@ __device__ auto preload(index idx, Ts... xs)
};
}
inline
__device__
auto
auto_preload
(
index
idx
)
{
return
make_transform
([
=
](
auto
f
,
auto
out
,
auto
...
xs
)
{
preload
<
typename
decltype
(
out
)
::
type
>
(
idx
,
xs
...)([
&
](
auto
...
ys
)
{
f
(
out
,
ys
...);
});
});
}
template
<
bool
B
,
class
T
>
__device__
auto
preload_copy
(
index
idx
,
T
x
)
{
return
[
=
](
auto
f
)
{
if
constexpr
(
B
)
{
using
type
=
typename
T
::
type
;
constexpr
auto
size
=
get_shape_c
<
T
>
{}.
element_space
();
__shared__
type
buffer
[
size
];
// TODO: Always vecotrize when size > 4, and then use a second loop for remainder
constexpr
auto
n
=
find_vectorize_size
([
&
](
auto
i
)
{
return
(
size
%
i
)
==
0
;
});
auto
input
=
as_vec
<
n
>
(
remove_bool
(
x
.
data
()));
auto
b
=
as_vec
<
n
>
(
remove_bool
(
buffer
));
idx
.
local_stride
(
size
/
n
,
[
&
](
auto
i
)
{
b
[
i
]
=
input
[
i
];
});
return
f
(
x
.
with
(
buffer
));
}
else
{
return
f
(
x
);
}
};
}
template
<
bool
...
Bs
>
__device__
auto
auto_preload
(
index
idx
)
{
return
make_transform
([
=
](
auto
f
,
auto
...
xs
)
{
auto
invoke
=
[
=
](
auto
...
ys
)
{
__syncthreads
();
f
(
ys
...);
};
join
(
invoke
,
preload_copy
<
Bs
>
(
idx
,
xs
)...);
});
}
}
// namespace migraphx
#endif // MIGRAPHX_GUARD_KERNELS_PRELOAD_HPP
src/targets/gpu/kernels/include/migraphx/kernels/roialign.hpp
View file @
a8a8d868
...
...
@@ -118,15 +118,13 @@ constexpr roalign_settings<Ts...> make_roalign_settings(Ts... xs)
}
template
<
class
T
,
class
U
,
class
V
,
class
W
,
class
Settings
>
__device__
void
roialign
(
const
T
&
x_t
,
const
U
&
rois_t
,
const
V
&
ind_t
,
const
W
&
y_t
,
Settings
s
)
__device__
void
roialign
(
const
T
&
x_t
,
const
U
&
rois_t
,
const
V
&
ind_t
,
W
&
y_t
,
Settings
s
)
{
auto
index
=
make_index
();
const
auto
x
=
x_t
.
begin
();
const
auto
rois
=
rois_t
.
begin
();
const
auto
ind
=
ind_t
.
begin
();
auto
out_ptr
=
y_t
.
begin
();
// input shape
auto
x_lens
=
x_t
.
get_shape
().
lens
;
auto
channel_num
=
x_lens
[
1
];
...
...
@@ -176,25 +174,25 @@ __device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, const W&
const
auto
offset_x
=
x
+
((
batch_ind
*
channel_num
+
c
)
*
in_dims
[
0
]
*
in_dims
[
1
]);
if
constexpr
(
s
.
is_avg_pooling
)
{
out_ptr
[
i
]
=
calc_pooling
(
offset_x
,
roi_starts
,
bin_size
,
{
ph
,
pw
},
bin_grid_size
,
in_dims
,
s
.
roi_offset
,
avg_pool
{});
y_t
[
i
]
=
calc_pooling
(
offset_x
,
roi_starts
,
bin_size
,
{
ph
,
pw
},
bin_grid_size
,
in_dims
,
s
.
roi_offset
,
avg_pool
{});
}
else
{
out_ptr
[
i
]
=
calc_pooling
(
offset_x
,
roi_starts
,
bin_size
,
{
ph
,
pw
},
bin_grid_size
,
in_dims
,
s
.
roi_offset
,
max_pool
{});
y_t
[
i
]
=
calc_pooling
(
offset_x
,
roi_starts
,
bin_size
,
{
ph
,
pw
},
bin_grid_size
,
in_dims
,
s
.
roi_offset
,
max_pool
{});
}
}
}
...
...
src/targets/gpu/kernels/include/migraphx/kernels/vec.hpp
View file @
a8a8d868
...
...
@@ -61,10 +61,19 @@ constexpr auto common_vec_size()
})(
vec_size
<
Ts
>
()...);
}
// Bools can not be used as a vector type so convert it to uint8
template
<
class
T
>
__device__
__host__
T
*
remove_bool
(
T
*
x
)
{
return
x
;
}
inline
__device__
__host__
uint8_t
*
remove_bool
(
bool
*
x
)
{
return
reinterpret_cast
<
uint8_t
*>
(
x
);
}
template
<
index_int
N
,
class
T
>
__device__
__host__
auto
as_vec
(
T
*
x
)
{
if
constexpr
(
N
==
0
)
if
constexpr
(
N
<
2
)
return
x
;
else
return
reinterpret_cast
<
vec
<
T
,
N
>*>
(
x
);
...
...
src/targets/gpu/kernels/include/migraphx/kernels/vectorize.hpp
View file @
a8a8d868
...
...
@@ -50,19 +50,10 @@ constexpr auto shape_step(Shape s, Axis)
});
}
// Bools can not be used as a vector type so convert it to uint8
template
<
class
T
>
__device__
__host__
T
*
remove_bool
(
T
*
x
)
{
return
x
;
}
inline
__device__
__host__
uint8_t
*
remove_bool
(
bool
*
x
)
{
return
reinterpret_cast
<
uint8_t
*>
(
x
);
}
template
<
index_int
N
,
class
T
,
class
Axis
>
__device__
__host__
auto
as_vec
(
T
x
,
Axis
axis
)
{
if
constexpr
(
N
==
0
)
if
constexpr
(
N
<
2
)
return
x
;
else
return
make_tensor_view
(
as_vec
<
N
>
(
remove_bool
(
x
.
data
())),
...
...
@@ -72,7 +63,7 @@ __device__ __host__ auto as_vec(T x, Axis axis)
template
<
index_int
N
,
class
T
,
class
Axis
>
constexpr
auto
tensor_step
(
T
x
,
Axis
axis
)
{
if
constexpr
(
N
==
0
)
if
constexpr
(
N
<
2
)
{
return
x
;
}
...
...
@@ -157,11 +148,11 @@ constexpr auto find_vectorize_size(P pred)
else
if
constexpr
(
decltype
(
pred
(
_c
<
2
>
)){})
return
_c
<
2
>
;
else
return
_c
<
0
>
;
return
_c
<
1
>
;
}
template
<
class
T
>
__host__
__device__
auto
vectorize
(
T
x
)
__host__
__device__
auto
auto_
vectorize
(
T
x
)
{
if
constexpr
(
tensor_vec_size
<
T
>
()
==
0
)
{
...
...
@@ -194,7 +185,7 @@ inline __device__ __host__ auto auto_vectorize_impl(F f, Ts... xs)
{
MIGRAPHX_ASSERT
(
s
.
strides
[
axis
]
==
0
or
s
.
strides
[
axis
]
==
1
);
MIGRAPHX_ASSERT
(
s
.
lens
[
axis
]
>
0
);
MIGRAPHX_ASSERT
(
n
==
0
or
s
.
lens
[
axis
]
%
n
==
0
);
MIGRAPHX_ASSERT
(
n
==
1
or
s
.
lens
[
axis
]
%
n
==
0
);
if
constexpr
(
s
.
strides
[
axis
]
==
0
)
return
tensor_step
<
n
>
(
x
,
axis
);
else
...
...
@@ -215,7 +206,32 @@ inline __device__ __host__ auto auto_vectorize_impl(F f, Ts... xs)
inline
__device__
__host__
auto
auto_vectorize
()
{
return
[](
auto
...
xs
)
{
return
[
=
](
auto
f
)
{
auto_vectorize_impl
(
f
,
xs
...);
};
};
return
make_transform
([](
auto
f
,
auto
...
xs
)
{
auto_vectorize_impl
(
f
,
xs
...);
});
}
template
<
index_int
N
,
index_int
Axis
,
class
T
>
__device__
__host__
auto
vectorize_tensor
(
T
x
)
{
constexpr
auto
shape
=
get_shape_c
<
T
>
{};
if
constexpr
(
shape
.
strides
[
Axis
]
==
0
)
return
tensor_step
<
N
>
(
x
,
_c
<
Axis
>
);
else
return
as_vec
<
N
>
(
x
,
_c
<
Axis
>
);
}
template
<
index_int
N
,
index_int
Axis
>
__device__
__host__
auto
vectorize
()
{
return
make_transform
([](
auto
f
,
auto
...
xs
)
{
if
constexpr
(
N
<
2
)
{
f
(
xs
...);
}
else
{
f
(
vectorize_tensor
<
N
,
Axis
>
(
xs
)...);
}
});
}
}
// namespace migraphx
...
...
src/targets/ref/lowering.cpp
View file @
a8a8d868
...
...
@@ -505,7 +505,7 @@ struct ref_unary : auto_register_op<ref_unary<Op>>
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
check_shapes
{
inputs
,
*
this
}.
has
(
1
);
auto
s
=
inputs
.
at
(
0
);
const
auto
&
s
=
inputs
.
at
(
0
);
return
{
s
.
type
(),
s
.
lens
()};
}
...
...
test/api/test_module_construct.cpp
View file @
a8a8d868
...
...
@@ -3,23 +3,21 @@
#include <migraphx/migraphx.hpp>
#include "test.hpp"
TEST_CASE
(
add_
op
)
TEST_CASE
(
add_
literals
)
{
migraphx
::
program
p
;
migraphx
::
module
m
=
p
.
get_main_module
();
migraphx
::
shape
param_shape
{
migraphx_shape_float_type
,
{
3
,
3
}};
auto
x
=
m
.
add_parameter
(
"x"
,
param_shape
);
auto
y
=
m
.
add_parameter
(
"y"
,
param_shape
);
std
::
vector
<
float
>
x_values
(
9
,
1
);
auto
x
=
m
.
add_literal
(
param_shape
,
x_values
.
data
());
std
::
vector
<
float
>
y_values
(
9
,
-
1
);
auto
y
=
m
.
add_literal
(
param_shape
,
y_values
.
data
());
auto
add_op
=
migraphx
::
operation
(
"add"
);
auto
r
=
m
.
add_instruction
(
add_op
,
{
x
,
y
});
m
.
add_return
({
r
});
// run on ref target
p
.
compile
(
migraphx
::
target
(
"ref"
));
migraphx
::
program_parameters
pp
;
std
::
vector
<
float
>
x_data
(
9
,
1
);
std
::
vector
<
float
>
y_data
(
9
,
-
1
);
pp
.
add
(
"x"
,
migraphx
::
argument
(
param_shape
,
x_data
.
data
()));
pp
.
add
(
"y"
,
migraphx
::
argument
(
param_shape
,
y_data
.
data
()));
auto
outputs
=
p
.
eval
(
pp
);
auto
output
=
outputs
[
0
];
std
::
vector
<
float
>
expected
(
9
,
0
);
...
...
test/gpu/jit.cpp
View file @
a8a8d868
...
...
@@ -3,6 +3,7 @@
#include <migraphx/make_op.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/program.hpp>
#include <migraphx/par_for.hpp>
#include <migraphx/gpu/kernel.hpp>
#include <migraphx/gpu/target.hpp>
#include <migraphx/gpu/hip.hpp>
...
...
@@ -109,6 +110,24 @@ int main() {}
)__migraphx__"
;
// NOLINTNEXTLINE
const
std
::
string
math_template
=
R"__migraphx__(
#include <migraphx/kernels/pointwise.hpp>
#include <migraphx/kernels/math.hpp>
extern "C" {
__global__ void kernel(${type}* p)
{
auto x = *p;
*p = migraphx::implicit_conversion(migraphx::${invoke});
}
}
int main() {}
)__migraphx__"
;
migraphx
::
src_file
make_src_file
(
const
std
::
string
&
name
,
const
std
::
string
&
content
)
{
return
{
name
,
std
::
make_pair
(
content
.
data
(),
content
.
data
()
+
content
.
size
())};
...
...
@@ -248,4 +267,66 @@ TEST_CASE(compile_pointwise)
EXPECT
(
result
==
output_literal
.
get_argument
());
}
TEST_CASE
(
compile_math
)
{
std
::
vector
<
std
::
string
>
math_invoke
=
{
// clang-format off
"abs(x)"
,
"acos(x)"
,
"acosh(x)"
,
"asin(x)"
,
"asinh(x)"
,
"atan(x)"
,
"atanh(x)"
,
"ceil(x)"
,
"cos(x)"
,
"cosh(x)"
,
"erf(x)"
,
"exp(x)"
,
"floor(x)"
,
"isnan(x)"
,
"log(x)"
,
"max(x, x)"
,
"min(x, x)"
,
"pow(x, 0)"
,
"pow(x, x)"
,
"round(x)"
,
"rsqrt(x)"
,
"sin(x)"
,
"sinh(x)"
,
"sqrt(x)"
,
"tan(x)"
,
"tanh(x)"
,
"where(true, x, x)"
,
// clang-format on
};
std
::
vector
<
std
::
string
>
data_types
;
auto
vec_sizes
=
{
2
,
4
,
6
};
for
(
auto
&&
t
:
migraphx
::
shape
::
types
())
{
if
(
contains
({
migraphx
::
shape
::
bool_type
,
migraphx
::
shape
::
tuple_type
},
t
))
continue
;
auto
name
=
migraphx
::
shape
::
cpp_type
(
t
);
if
(
t
==
migraphx
::
shape
::
half_type
)
name
.
insert
(
0
,
"migraphx::"
);
data_types
.
push_back
(
name
);
migraphx
::
transform
(
vec_sizes
,
std
::
back_inserter
(
data_types
),
[
&
](
auto
i
)
{
return
"migraphx::vec<"
+
name
+
", "
+
std
::
to_string
(
i
)
+
">"
;
});
}
migraphx
::
shape
input
{
migraphx
::
shape
::
float_type
,
{
5
,
2
}};
migraphx
::
gpu
::
hip_compile_options
options
;
options
.
global
=
1024
;
options
.
local
=
1024
;
options
.
inputs
=
{
input
};
options
.
output
=
input
;
migraphx
::
par_for
(
math_invoke
.
size
()
*
data_types
.
size
(),
1
,
[
&
](
auto
i
)
{
const
auto
&
t
=
data_types
[
i
%
data_types
.
size
()];
const
auto
&
invoke
=
math_invoke
[
i
/
data_types
.
size
()];
auto
src
=
migraphx
::
interpolate_string
(
math_template
,
{{
"type"
,
t
},
{
"invoke"
,
invoke
}});
auto
co
=
migraphx
::
gpu
::
compile_hip_code_object
(
src
,
options
);
(
void
)
co
;
});
}
int
main
(
int
argc
,
const
char
*
argv
[])
{
test
::
run
(
argc
,
argv
);
}
test/py/test_module_construct.py
View file @
a8a8d868
import
migraphx
import
migraphx
,
array
,
sys
def
create_buffer
(
t
,
data
,
shape
):
a
=
array
.
array
(
t
,
data
)
m
=
memoryview
(
a
.
tobytes
())
return
m
.
cast
(
t
,
shape
)
def
test_add_op
():
p
=
migraphx
.
program
()
mm
=
p
.
get_main_module
()
param_shape
=
migraphx
.
shape
(
lens
=
[
3
,
3
],
type
=
"float"
)
x
=
mm
.
add_parameter
(
"x"
,
param_shape
)
y
=
mm
.
add_parameter
(
"y"
,
param_shape
)
x
=
mm
.
add_literal
(
create_buffer
(
'f'
,
[
1.0
]
*
9
,
(
3
,
3
)))
y
=
mm
.
add_literal
(
create_buffer
(
'f'
,
[
2.0
]
*
9
,
(
3
,
3
)))
add_op
=
mm
.
add_instruction
(
migraphx
.
op
(
"add"
),
[
x
,
y
])
mm
.
add_return
([
add_op
])
p
.
compile
(
migraphx
.
get_target
(
"ref"
))
params
=
{}
params
[
"x"
]
=
migraphx
.
generate_argument
(
param_shape
)
params
[
"y"
]
=
migraphx
.
generate_argument
(
param_shape
)
output
=
p
.
run
(
params
)[
-
1
].
tolist
()
assert
output
==
[
a
+
b
for
a
,
b
in
zip
(
params
[
"x"
].
tolist
(),
params
[
"y"
].
tolist
())
]
assert
output
==
list
([
3.0
]
*
9
)
def
test_if_then_else
():
...
...
@@ -60,5 +61,6 @@ def test_if_then_else():
if
__name__
==
"__main__"
:
test_add_op
()
if
sys
.
version_info
>=
(
3
,
0
):
test_add_op
()
test_if_then_else
()
test/py/test_numpy.py
0 → 100644
View file @
a8a8d868
import
migraphx
,
sys
try
:
import
numpy
as
np
except
:
sys
.
exit
()
def
test_add_op
():
p
=
migraphx
.
program
()
mm
=
p
.
get_main_module
()
x
=
mm
.
add_literal
(
np
.
ones
((
3
,
3
),
dtype
=
'float32'
))
y
=
mm
.
add_literal
(
2
*
np
.
ones
((
3
,
3
),
dtype
=
'float32'
))
add_op
=
mm
.
add_instruction
(
migraphx
.
op
(
"add"
),
[
x
,
y
])
mm
.
add_return
([
add_op
])
p
.
compile
(
migraphx
.
get_target
(
"ref"
))
params
=
{}
output
=
p
.
run
(
params
)[
-
1
].
tolist
()
assert
output
==
list
(
3
*
np
.
ones
((
9
),
dtype
=
'float32'
))
if
__name__
==
"__main__"
:
test_add_op
()
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment