Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
6bc31506
"test/vscode:/vscode.git/clone" did not exist on "3ba149328f2704e096b2eed7ffeacff0b54fdc8b"
Unverified
Commit
6bc31506
authored
Aug 26, 2019
by
Paul Fultz II
Committed by
GitHub
Aug 26, 2019
Browse files
Merge branch 'develop' into multiply-add
parents
3043afe5
7534546a
Changes
16
Hide whitespace changes
Inline
Side-by-side
Showing
16 changed files
with
204 additions
and
158 deletions
+204
-158
src/driver/argument_parser.hpp
src/driver/argument_parser.hpp
+47
-4
src/driver/main.cpp
src/driver/main.cpp
+24
-1
src/driver/perf.cpp
src/driver/perf.cpp
+17
-0
src/driver/perf.hpp
src/driver/perf.hpp
+1
-0
src/generate.cpp
src/generate.cpp
+11
-0
src/include/migraphx/generate.hpp
src/include/migraphx/generate.hpp
+10
-0
src/include/migraphx/requires.hpp
src/include/migraphx/requires.hpp
+4
-3
src/targets/gpu/CMakeLists.txt
src/targets/gpu/CMakeLists.txt
+1
-1
src/targets/gpu/device/include/migraphx/gpu/device/reduce.hpp
...targets/gpu/device/include/migraphx/gpu/device/reduce.hpp
+5
-16
src/targets/gpu/device/sigmoid.cpp
src/targets/gpu/device/sigmoid.cpp
+18
-0
src/targets/gpu/include/migraphx/gpu/device/sigmoid.hpp
src/targets/gpu/include/migraphx/gpu/device/sigmoid.hpp
+20
-0
src/targets/gpu/include/migraphx/gpu/sigmoid.hpp
src/targets/gpu/include/migraphx/gpu/sigmoid.hpp
+3
-21
src/targets/gpu/lowering.cpp
src/targets/gpu/lowering.cpp
+1
-1
src/targets/gpu/quant_gemm.cpp
src/targets/gpu/quant_gemm.cpp
+30
-75
src/targets/gpu/sigmoid.cpp
src/targets/gpu/sigmoid.cpp
+0
-36
test/gpu/miopen.cpp
test/gpu/miopen.cpp
+12
-0
No files found.
src/driver/argument_parser.hpp
View file @
6bc31506
...
@@ -28,10 +28,32 @@ inline namespace MIGRAPHX_INLINE_NS {
...
@@ -28,10 +28,32 @@ inline namespace MIGRAPHX_INLINE_NS {
#define MIGRAPHX_DRIVER_STATIC static
#define MIGRAPHX_DRIVER_STATIC static
#endif
#endif
template
<
class
T
>
using
bare
=
std
::
remove_cv_t
<
std
::
remove_reference_t
<
T
>>
;
namespace
detail
{
template
<
class
T
>
auto
is_container
(
int
,
T
&&
x
)
->
decltype
(
x
.
insert
(
x
.
end
(),
*
x
.
begin
()),
std
::
true_type
{});
template
<
class
T
>
std
::
false_type
is_container
(
float
,
T
&&
);
}
// namespace detail
template
<
class
T
>
struct
is_container
:
decltype
(
detail
::
is_container
(
int
(
0
),
std
::
declval
<
T
>
()))
{
};
template
<
class
T
>
using
is_multi_value
=
std
::
integral_constant
<
bool
,
(
is_container
<
T
>
{}
and
not
std
::
is_convertible
<
T
,
std
::
string
>
{})
>
;
template
<
class
T
>
template
<
class
T
>
struct
value_parser
struct
value_parser
{
{
template
<
MIGRAPHX_REQUIRES
(
not
std
::
is_enum
<
T
>{})
>
template
<
MIGRAPHX_REQUIRES
(
not
std
::
is_enum
<
T
>{}
and
not
is_multi_value
<
T
>
{}
)
>
static
T
apply
(
const
std
::
string
&
x
)
static
T
apply
(
const
std
::
string
&
x
)
{
{
T
result
;
T
result
;
...
@@ -43,7 +65,7 @@ struct value_parser
...
@@ -43,7 +65,7 @@ struct value_parser
return
result
;
return
result
;
}
}
template
<
MIGRAPHX_REQUIRES
(
std
::
is_enum
<
T
>{})
>
template
<
MIGRAPHX_REQUIRES
(
std
::
is_enum
<
T
>{}
and
not
is_multi_value
<
T
>
{}
)
>
static
T
apply
(
const
std
::
string
&
x
)
static
T
apply
(
const
std
::
string
&
x
)
{
{
std
::
ptrdiff_t
i
;
std
::
ptrdiff_t
i
;
...
@@ -54,6 +76,15 @@ struct value_parser
...
@@ -54,6 +76,15 @@ struct value_parser
throw
std
::
runtime_error
(
"Failed to parse: "
+
x
);
throw
std
::
runtime_error
(
"Failed to parse: "
+
x
);
return
static_cast
<
T
>
(
i
);
return
static_cast
<
T
>
(
i
);
}
}
template
<
MIGRAPHX_REQUIRES
(
is_multi_value
<
T
>{}
and
not
std
::
is_enum
<
T
>
{})
>
static
T
apply
(
const
std
::
string
&
x
)
{
T
result
;
using
value_type
=
typename
T
::
value_type
;
result
.
insert
(
result
.
end
(),
value_parser
<
value_type
>::
apply
(
x
));
return
result
;
}
};
};
struct
argument_parser
struct
argument_parser
...
@@ -69,6 +100,18 @@ struct argument_parser
...
@@ -69,6 +100,18 @@ struct argument_parser
unsigned
nargs
=
1
;
unsigned
nargs
=
1
;
};
};
template
<
class
T
,
MIGRAPHX_REQUIRES
(
is_multi_value
<
T
>{})
>
std
::
string
as_string_value
(
const
T
&
x
)
{
return
to_string_range
(
x
);
}
template
<
class
T
,
MIGRAPHX_REQUIRES
(
not
is_multi_value
<
T
>{})
>
std
::
string
as_string_value
(
const
T
&
x
)
{
return
to_string
(
x
);
}
template
<
class
T
,
class
...
Fs
>
template
<
class
T
,
class
...
Fs
>
void
operator
()(
T
&
x
,
const
std
::
vector
<
std
::
string
>&
flags
,
Fs
...
fs
)
void
operator
()(
T
&
x
,
const
std
::
vector
<
std
::
string
>&
flags
,
Fs
...
fs
)
{
{
...
@@ -81,7 +124,7 @@ struct argument_parser
...
@@ -81,7 +124,7 @@ struct argument_parser
argument
&
arg
=
arguments
.
back
();
argument
&
arg
=
arguments
.
back
();
arg
.
type
=
migraphx
::
get_type_name
<
T
>
();
arg
.
type
=
migraphx
::
get_type_name
<
T
>
();
arg
.
default_value
=
to
_string
(
x
);
arg
.
default_value
=
as
_string
_value
(
x
);
migraphx
::
each_args
([
&
](
auto
f
)
{
f
(
x
,
arg
);
},
fs
...);
migraphx
::
each_args
([
&
](
auto
f
)
{
f
(
x
,
arg
);
},
fs
...);
}
}
...
@@ -127,7 +170,7 @@ struct argument_parser
...
@@ -127,7 +170,7 @@ struct argument_parser
MIGRAPHX_DRIVER_STATIC
auto
append
()
MIGRAPHX_DRIVER_STATIC
auto
append
()
{
{
return
write_action
([](
auto
&
,
auto
&
x
,
auto
&
params
)
{
return
write_action
([](
auto
&
,
auto
&
x
,
auto
&
params
)
{
using
type
=
typename
decltype
(
params
)
::
value_type
;
using
type
=
typename
bare
<
decltype
(
params
)
>
::
value_type
;
std
::
transform
(
params
.
begin
(),
std
::
transform
(
params
.
begin
(),
params
.
end
(),
params
.
end
(),
std
::
inserter
(
x
,
x
.
end
()),
std
::
inserter
(
x
,
x
.
end
()),
...
...
src/driver/main.cpp
View file @
6bc31506
...
@@ -8,6 +8,7 @@
...
@@ -8,6 +8,7 @@
#include <migraphx/stringutils.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/pass_manager.hpp>
#include <migraphx/pass_manager.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/dead_code_elimination.hpp>
#include <migraphx/dead_code_elimination.hpp>
#include <migraphx/eliminate_identity.hpp>
#include <migraphx/eliminate_identity.hpp>
#include <migraphx/eliminate_pad.hpp>
#include <migraphx/eliminate_pad.hpp>
...
@@ -80,11 +81,13 @@ struct compiler
...
@@ -80,11 +81,13 @@ struct compiler
{
{
loader
l
;
loader
l
;
bool
gpu
=
true
;
bool
gpu
=
true
;
std
::
vector
<
std
::
string
>
fill1
;
void
parse
(
argument_parser
&
ap
)
void
parse
(
argument_parser
&
ap
)
{
{
l
.
parse
(
ap
);
l
.
parse
(
ap
);
ap
(
gpu
,
{
"--gpu"
},
ap
.
help
(
"Compile on the gpu"
),
ap
.
set_value
(
true
));
ap
(
gpu
,
{
"--gpu"
},
ap
.
help
(
"Compile on the gpu"
),
ap
.
set_value
(
true
));
ap
(
gpu
,
{
"--cpu"
},
ap
.
help
(
"Compile on the cpu"
),
ap
.
set_value
(
false
));
ap
(
gpu
,
{
"--cpu"
},
ap
.
help
(
"Compile on the cpu"
),
ap
.
set_value
(
false
));
ap
(
fill1
,
{
"--fill1"
},
ap
.
help
(
"Fill parameter with 1s"
),
ap
.
append
());
}
}
program
compile
()
program
compile
()
...
@@ -94,7 +97,14 @@ struct compiler
...
@@ -94,7 +97,14 @@ struct compiler
return
p
;
return
p
;
}
}
auto
params
(
const
program
&
p
)
{
return
create_param_map
(
p
,
gpu
);
}
auto
params
(
const
program
&
p
)
{
program
::
parameter_map
m
;
for
(
auto
&&
s
:
fill1
)
m
[
s
]
=
fill_argument
(
p
.
get_parameter_shape
(
s
),
1
);
fill_param_map
(
m
,
p
,
gpu
);
return
m
;
}
};
};
struct
read
:
command
<
read
>
struct
read
:
command
<
read
>
...
@@ -109,6 +119,19 @@ struct read : command<read>
...
@@ -109,6 +119,19 @@ struct read : command<read>
}
}
};
};
struct
params
:
command
<
params
>
{
loader
l
;
void
parse
(
argument_parser
&
ap
)
{
l
.
parse
(
ap
);
}
void
run
()
{
auto
p
=
l
.
load
();
for
(
auto
&&
param
:
p
.
get_parameter_shapes
())
std
::
cout
<<
param
.
first
<<
": "
<<
param
.
second
<<
std
::
endl
;
}
};
struct
verify
:
command
<
verify
>
struct
verify
:
command
<
verify
>
{
{
loader
l
;
loader
l
;
...
...
src/driver/perf.cpp
View file @
6bc31506
...
@@ -11,6 +11,23 @@ namespace migraphx {
...
@@ -11,6 +11,23 @@ namespace migraphx {
namespace
driver
{
namespace
driver
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
program
::
parameter_map
fill_param_map
(
program
::
parameter_map
&
m
,
const
program
&
p
,
bool
gpu
)
{
for
(
auto
&&
x
:
p
.
get_parameter_shapes
())
{
argument
&
arg
=
m
[
x
.
first
];
if
(
arg
.
empty
())
arg
=
generate_argument
(
x
.
second
);
#ifdef HAVE_GPU
if
(
gpu
)
arg
=
gpu
::
to_gpu
(
arg
);
#else
(
void
)
gpu
;
#endif
}
return
m
;
}
program
::
parameter_map
create_param_map
(
const
program
&
p
,
bool
gpu
)
program
::
parameter_map
create_param_map
(
const
program
&
p
,
bool
gpu
)
{
{
program
::
parameter_map
m
;
program
::
parameter_map
m
;
...
...
src/driver/perf.hpp
View file @
6bc31506
...
@@ -7,6 +7,7 @@ namespace migraphx {
...
@@ -7,6 +7,7 @@ namespace migraphx {
namespace
driver
{
namespace
driver
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
program
::
parameter_map
fill_param_map
(
program
::
parameter_map
&
m
,
const
program
&
p
,
bool
gpu
);
program
::
parameter_map
create_param_map
(
const
program
&
p
,
bool
gpu
=
true
);
program
::
parameter_map
create_param_map
(
const
program
&
p
,
bool
gpu
=
true
);
void
compile_program
(
program
&
p
,
bool
gpu
=
true
);
void
compile_program
(
program
&
p
,
bool
gpu
=
true
);
...
...
src/generate.cpp
View file @
6bc31506
...
@@ -3,6 +3,17 @@
...
@@ -3,6 +3,17 @@
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
argument
fill_argument
(
shape
s
,
unsigned
long
value
)
{
argument
result
;
s
.
visit_type
([
&
](
auto
as
)
{
using
type
=
typename
decltype
(
as
)
::
type
;
auto
v
=
fill_tensor_data
<
type
>
(
s
,
value
);
result
=
{
s
,
[
v
]()
mutable
{
return
reinterpret_cast
<
char
*>
(
v
.
data
());
}};
});
return
result
;
}
argument
generate_argument
(
shape
s
,
unsigned
long
seed
)
argument
generate_argument
(
shape
s
,
unsigned
long
seed
)
{
{
argument
result
;
argument
result
;
...
...
src/include/migraphx/generate.hpp
View file @
6bc31506
...
@@ -87,6 +87,16 @@ std::vector<T> generate_tensor_data(const migraphx::shape& s, unsigned long seed
...
@@ -87,6 +87,16 @@ std::vector<T> generate_tensor_data(const migraphx::shape& s, unsigned long seed
return
result
;
return
result
;
}
}
template
<
class
T
>
std
::
vector
<
T
>
fill_tensor_data
(
const
migraphx
::
shape
&
s
,
unsigned
long
value
=
0
)
{
std
::
vector
<
T
>
result
(
s
.
elements
());
std
::
generate
(
result
.
begin
(),
result
.
end
(),
[
=
]
{
return
value
;
});
return
result
;
}
argument
fill_argument
(
shape
s
,
unsigned
long
value
=
0
);
argument
generate_argument
(
shape
s
,
unsigned
long
seed
=
0
);
argument
generate_argument
(
shape
s
,
unsigned
long
seed
=
0
);
literal
generate_literal
(
shape
s
,
unsigned
long
seed
=
0
);
literal
generate_literal
(
shape
s
,
unsigned
long
seed
=
0
);
...
...
src/include/migraphx/requires.hpp
View file @
6bc31506
...
@@ -23,9 +23,10 @@ using bool_c = std::integral_constant<bool, B>;
...
@@ -23,9 +23,10 @@ using bool_c = std::integral_constant<bool, B>;
#ifdef CPPCHECK
#ifdef CPPCHECK
#define MIGRAPHX_REQUIRES(...) class = void
#define MIGRAPHX_REQUIRES(...) class = void
#else
#else
#define MIGRAPHX_REQUIRES(...) \
#define MIGRAPHX_REQUIRES(...) \
bool MIGRAPHX_REQUIRES_VAR() = true, \
long MIGRAPHX_REQUIRES_VAR() = __LINE__, \
typename std::enable_if<(MIGRAPHX_REQUIRES_VAR() && (migraphx::and_<__VA_ARGS__>{})), \
typename std::enable_if<(MIGRAPHX_REQUIRES_VAR() == __LINE__ && \
(migraphx::and_<__VA_ARGS__>{})), \
int>::type = 0
int>::type = 0
#endif
#endif
...
...
src/targets/gpu/CMakeLists.txt
View file @
6bc31506
...
@@ -34,6 +34,7 @@ add_library(migraphx_device
...
@@ -34,6 +34,7 @@ add_library(migraphx_device
device/contiguous.cpp
device/contiguous.cpp
device/logsoftmax.cpp
device/logsoftmax.cpp
device/softmax.cpp
device/softmax.cpp
device/sigmoid.cpp
device/convert.cpp
device/convert.cpp
device/mul.cpp
device/mul.cpp
device/concat.cpp
device/concat.cpp
...
@@ -78,7 +79,6 @@ add_library(migraphx_gpu
...
@@ -78,7 +79,6 @@ add_library(migraphx_gpu
batchnorm.cpp
batchnorm.cpp
write_literals.cpp
write_literals.cpp
rocblas.cpp
rocblas.cpp
sigmoid.cpp
abs.cpp
abs.cpp
elu.cpp
elu.cpp
pad.cpp
pad.cpp
...
...
src/targets/gpu/device/include/migraphx/gpu/device/reduce.hpp
View file @
6bc31506
...
@@ -245,8 +245,7 @@ void reduce_standard_impl(hipStream_t stream,
...
@@ -245,8 +245,7 @@ void reduce_standard_impl(hipStream_t stream,
T
init
,
T
init
,
Input
read_input
,
Input
read_input
,
Output
read_output
,
Output
read_output
,
std
::
size_t
relements
,
std
::
size_t
relements
)
std
::
size_t
stride
)
{
{
hip_visit_all
(
result
,
arg
)([
&
](
auto
output
,
auto
input
)
{
hip_visit_all
(
result
,
arg
)([
&
](
auto
output
,
auto
input
)
{
auto
nelements
=
result
.
get_shape
().
elements
();
auto
nelements
=
result
.
get_shape
().
elements
();
...
@@ -255,7 +254,7 @@ void reduce_standard_impl(hipStream_t stream,
...
@@ -255,7 +254,7 @@ void reduce_standard_impl(hipStream_t stream,
const
std
::
size_t
block_size
=
compute_block_size
(
relements
,
max_block_size
);
const
std
::
size_t
block_size
=
compute_block_size
(
relements
,
max_block_size
);
gs_launch
(
stream
,
nelements
*
block_size
,
block_size
)([
=
](
auto
i
,
auto
idx
)
__device__
{
gs_launch
(
stream
,
nelements
*
block_size
,
block_size
)([
=
](
auto
i
,
auto
idx
)
__device__
{
const
auto
out_idx
=
i
/
block_size
;
const
auto
out_idx
=
i
/
block_size
;
const
auto
base_idx
=
out_idx
*
stride
;
const
auto
base_idx
=
out_idx
*
relements
;
auto
r
=
block_reduce
<
max_block_size
>
(
idx
,
op
,
init
,
relements
,
[
&
](
auto
j
)
__device__
{
auto
r
=
block_reduce
<
max_block_size
>
(
idx
,
op
,
init
,
relements
,
[
&
](
auto
j
)
__device__
{
return
read_input
(
input
.
data
()[
base_idx
+
j
]);
return
read_input
(
input
.
data
()[
base_idx
+
j
]);
});
});
...
@@ -276,25 +275,15 @@ void reduce(hipStream_t stream,
...
@@ -276,25 +275,15 @@ void reduce(hipStream_t stream,
{
{
auto
&&
output_shape
=
result
.
get_shape
();
auto
&&
output_shape
=
result
.
get_shape
();
auto
&&
input_shape
=
arg
.
get_shape
();
auto
&&
input_shape
=
arg
.
get_shape
();
assert
(
output_shape
.
lens
().
size
()
==
input_shape
.
lens
().
size
());
if
(
input_shape
.
standard
()
and
output_shape
.
standard
()
and
if
(
input_shape
.
standard
()
and
output_shape
.
standard
()
and
output_shape
.
lens
().
back
()
!=
input_shape
.
lens
().
back
()
and
output_shape
.
lens
().
back
()
!=
input_shape
.
lens
().
back
()
and
std
::
equal
(
output_shape
.
lens
().
begin
(),
std
::
equal
(
output_shape
.
lens
().
begin
(),
std
::
prev
(
output_shape
.
lens
().
end
()),
std
::
prev
(
output_shape
.
lens
().
end
()),
input_shape
.
lens
().
begin
()))
input_shape
.
lens
().
begin
()))
{
{
std
::
size_t
stride
=
std
::
accumulate
(
input_shape
.
strides
().
begin
(),
reduce_standard_impl
(
input_shape
.
strides
().
end
(),
stream
,
result
,
arg
,
op
,
init
,
read_input
,
read_output
,
input_shape
.
lens
().
back
());
1
,
std
::
multiplies
<
size_t
>
());
reduce_standard_impl
(
stream
,
result
,
arg
,
op
,
init
,
read_input
,
read_output
,
input_shape
.
lens
().
back
(),
stride
);
}
}
else
else
{
{
...
...
src/targets/gpu/device/sigmoid.cpp
0 → 100644
View file @
6bc31506
#include <migraphx/gpu/device/sigmoid.hpp>
#include <migraphx/gpu/device/nary.hpp>
#include <migraphx/gpu/device/types.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
void
sigmoid
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
{
nary
(
stream
,
result
,
arg
)([](
auto
x
)
{
return
1.
f
/
(
1.
f
+
::
exp
(
to_hip_type
(
-
x
)));
});
}
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/include/migraphx/gpu/device/sigmoid.hpp
0 → 100644
View file @
6bc31506
#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_SIGMOID_HPP
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_SIGMOID_HPP
#include <migraphx/argument.hpp>
#include <migraphx/config.hpp>
#include <hip/hip_runtime_api.h>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
void
sigmoid
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
);
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/include/migraphx/gpu/sigmoid.hpp
View file @
6bc31506
#ifndef MIGRAPHX_GUARD_RTGLIB_SIGMOID_HPP
#ifndef MIGRAPHX_GUARD_RTGLIB_SIGMOID_HPP
#define MIGRAPHX_GUARD_RTGLIB_SIGMOID_HPP
#define MIGRAPHX_GUARD_RTGLIB_SIGMOID_HPP
#include <migraphx/
sha
pe.hpp>
#include <migraphx/
gpu/o
pe
r
.hpp>
#include <migraphx/gpu/
miopen
.hpp>
#include <migraphx/gpu/
device/sigmoid
.hpp>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
gpu
{
struct
context
;
struct
hip_sigmoid
:
unary_device
<
hip_sigmoid
,
device
::
sigmoid
>
struct
miopen_sigmoid
{
{
shared
<
activation_descriptor
>
ad
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
gpu
::
reflect
(
self
.
ad
.
get
(),
f
);
}
std
::
string
name
()
const
{
return
"gpu::sigmoid"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
;
argument
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
;
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
};
};
}
// namespace gpu
}
// namespace gpu
...
...
src/targets/gpu/lowering.cpp
View file @
6bc31506
...
@@ -86,7 +86,6 @@ struct miopen_apply
...
@@ -86,7 +86,6 @@ struct miopen_apply
void
init
()
void
init
()
{
{
this
->
last
=
instruction
::
get_output_alias
(
std
::
prev
(
prog
->
end
()));
this
->
last
=
instruction
::
get_output_alias
(
std
::
prev
(
prog
->
end
()));
add_miopen_simple_op
<
miopen_sigmoid
>
(
"sigmoid"
,
make_sigmoid
);
add_miopen_simple_op
<
miopen_abs
>
(
"abs"
,
make_abs
);
add_miopen_simple_op
<
miopen_abs
>
(
"abs"
,
make_abs
);
add_miopen_extend_op
<
miopen_leaky_relu
,
op
::
leaky_relu
>
(
"leaky_relu"
,
make_leaky_relu
);
add_miopen_extend_op
<
miopen_leaky_relu
,
op
::
leaky_relu
>
(
"leaky_relu"
,
make_leaky_relu
);
...
@@ -116,6 +115,7 @@ struct miopen_apply
...
@@ -116,6 +115,7 @@ struct miopen_apply
add_generic_op
<
hip_sqdiff
>
(
"sqdiff"
);
add_generic_op
<
hip_sqdiff
>
(
"sqdiff"
);
add_generic_op
<
hip_relu
>
(
"relu"
);
add_generic_op
<
hip_relu
>
(
"relu"
);
add_generic_op
<
hip_sign
>
(
"sign"
);
add_generic_op
<
hip_sign
>
(
"sign"
);
add_generic_op
<
hip_sigmoid
>
(
"sigmoid"
);
add_extend_op
<
miopen_gemm
,
op
::
dot
>
(
"dot"
);
add_extend_op
<
miopen_gemm
,
op
::
dot
>
(
"dot"
);
add_extend_op
<
rocblas_quant_gemm
,
op
::
quant_dot
>
(
"quant_dot"
);
add_extend_op
<
rocblas_quant_gemm
,
op
::
quant_dot
>
(
"quant_dot"
);
...
...
src/targets/gpu/quant_gemm.cpp
View file @
6bc31506
...
@@ -8,51 +8,6 @@ namespace migraphx {
...
@@ -8,51 +8,6 @@ namespace migraphx {
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
gpu
{
template
<
class
...
Ts
>
rocblas_status
generic_rocblas_gemm_ex
(
Ts
&&
...
xs
)
{
return
rocblas_gemm_ex
(
std
::
forward
<
Ts
>
(
xs
)...);
}
template
<
class
...
Ts
>
rocblas_status
generic_rocblas_batched_gemm_ex
(
Ts
&&
...
xs
)
{
return
rocblas_gemm_strided_batched_ex
(
std
::
forward
<
Ts
>
(
xs
)...);
}
template
<
class
T
>
struct
compute_rocblas_type
{
using
type
=
T
;
};
template
<
class
T
>
struct
compute_rocblas_type
<
const
T
>
{
using
type
=
const
typename
compute_rocblas_type
<
T
>::
type
;
};
template
<
>
struct
compute_rocblas_type
<
half
>
{
using
type
=
rocblas_half
;
};
template
<
class
T
>
using
rb_type
=
typename
compute_rocblas_type
<
T
>::
type
;
template
<
class
T
>
rb_type
<
T
>
to_rocblas_type
(
T
x
)
{
return
reinterpret_cast
<
const
rb_type
<
T
>&>
(
x
);
}
template
<
class
T
>
rb_type
<
T
>*
to_rocblas_type
(
T
*
x
)
{
return
reinterpret_cast
<
rb_type
<
T
>*>
(
x
);
}
shape
rocblas_quant_gemm
::
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
shape
rocblas_quant_gemm
::
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
{
std
::
vector
<
shape
>
in_shapes
(
inputs
);
std
::
vector
<
shape
>
in_shapes
(
inputs
);
...
@@ -102,13 +57,13 @@ argument rocblas_quant_gemm::compute(context& ctx,
...
@@ -102,13 +57,13 @@ argument rocblas_quant_gemm::compute(context& ctx,
auto
a_lens
=
args
[
0
].
get_shape
().
lens
();
auto
a_lens
=
args
[
0
].
get_shape
().
lens
();
auto
b_lens
=
args
[
1
].
get_shape
().
lens
();
auto
b_lens
=
args
[
1
].
get_shape
().
lens
();
output_shape
.
visit_type
([
&
](
auto
as
)
{
output_shape
.
visit_type
([
&
](
auto
as
)
{
auto
alpha_r
=
to_rocblas_type
(
as
(
op
.
alpha
)
)
;
auto
alpha_r
=
as
(
op
.
alpha
);
auto
beta_r
=
to_rocblas_type
(
as
(
beta
)
)
;
auto
beta_r
=
as
(
beta
);
auto
out_lens
=
output_shape
.
lens
();
auto
out_lens
=
output_shape
.
lens
();
rocblas_int
m
=
out_lens
[
dim_0
];
rocblas_int
m
=
out_lens
[
dim_0
];
rocblas_int
n
=
out_lens
[
dim_1
];
rocblas_int
n
=
out_lens
[
dim_1
];
rocblas_int
k
=
args
[
0
].
get_shape
().
lens
()[
dim_1
];
rocblas_int
k
=
args
[
0
].
get_shape
().
lens
()[
dim_1
];
auto
to_pointer
=
[
&
](
auto
&&
arg
)
{
return
to_rocblas_type
(
as
.
from
(
arg
.
data
())
)
;
};
auto
to_pointer
=
[
&
](
auto
&&
arg
)
{
return
as
.
from
(
arg
.
data
());
};
assert
(
k
%
4
==
0
);
assert
(
k
%
4
==
0
);
auto
num_matrices
=
std
::
accumulate
(
auto
num_matrices
=
std
::
accumulate
(
...
@@ -119,36 +74,36 @@ argument rocblas_quant_gemm::compute(context& ctx,
...
@@ -119,36 +74,36 @@ argument rocblas_quant_gemm::compute(context& ctx,
// column-major format. When doing a C = A * B, we actually do
// column-major format. When doing a C = A * B, we actually do
// C^T = (B^T) * (A^T). That is the reason we input args[1] as
// C^T = (B^T) * (A^T). That is the reason we input args[1] as
// A and args[0] as B in calling the rocblas_gemm.
// A and args[0] as B in calling the rocblas_gemm.
generic_
rocblas_gemm_ex
(
ctx
.
get_stream
().
get_rocblas
(),
rocblas_gemm_ex
(
ctx
.
get_stream
().
get_rocblas
(),
transb
?
rocblas_operation_transpose
:
rocblas_operation_none
,
transb
?
rocblas_operation_transpose
:
rocblas_operation_none
,
transa
?
rocblas_operation_transpose
:
rocblas_operation_none
,
transa
?
rocblas_operation_transpose
:
rocblas_operation_none
,
n
,
n
,
m
,
m
,
k
,
k
,
&
alpha_r
,
&
alpha_r
,
to_pointer
(
args
.
at
(
1
)),
to_pointer
(
args
.
at
(
1
)),
rocblas_datatype_i8_r
,
rocblas_datatype_i8_r
,
ldb
,
ldb
,
to_pointer
(
args
.
at
(
0
)),
to_pointer
(
args
.
at
(
0
)),
rocblas_datatype_i8_r
,
rocblas_datatype_i8_r
,
lda
,
lda
,
&
beta_r
,
&
beta_r
,
to_pointer
(
args
[
2
]),
to_pointer
(
args
[
2
]),
rocblas_datatype_i32_r
,
rocblas_datatype_i32_r
,
ldc
,
ldc
,
is_3inputs
?
to_pointer
(
args
[
3
])
:
to_pointer
(
args
[
2
]),
is_3inputs
?
to_pointer
(
args
[
3
])
:
to_pointer
(
args
[
2
]),
rocblas_datatype_i32_r
,
rocblas_datatype_i32_r
,
ldc
,
ldc
,
rocblas_datatype_i32_r
,
rocblas_datatype_i32_r
,
rocblas_gemm_algo_standard
,
rocblas_gemm_algo_standard
,
0
,
0
,
0
,
0
,
nullptr
,
nullptr
,
nullptr
);
nullptr
);
}
}
else
else
{
{
generic_rocblas
_batched_
gemm_
ex
(
rocblas_gemm_strided
_batched_ex
(
ctx
.
get_stream
().
get_rocblas
(),
ctx
.
get_stream
().
get_rocblas
(),
transb
?
rocblas_operation_transpose
:
rocblas_operation_none
,
transb
?
rocblas_operation_transpose
:
rocblas_operation_none
,
transa
?
rocblas_operation_transpose
:
rocblas_operation_none
,
transa
?
rocblas_operation_transpose
:
rocblas_operation_none
,
...
...
src/targets/gpu/sigmoid.cpp
deleted
100644 → 0
View file @
3043afe5
#include <migraphx/gpu/sigmoid.hpp>
#include <migraphx/gpu/context.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
shape
miopen_sigmoid
::
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
check_shapes
{
inputs
,
*
this
}.
has
(
2
).
not_broadcasted
();
return
inputs
.
at
(
1
);
}
argument
miopen_sigmoid
::
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
{
float
alpha
=
1
;
float
beta
=
0
;
auto
x_desc
=
make_tensor
(
args
[
0
].
get_shape
());
auto
y_desc
=
make_tensor
(
output_shape
);
miopenActivationForward
(
ctx
.
get_stream
().
get_miopen
(),
ad
.
get
(),
&
alpha
,
x_desc
.
get
(),
args
[
0
].
implicit
(),
&
beta
,
y_desc
.
get
(),
args
[
1
].
implicit
());
return
args
[
1
];
}
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
test/gpu/miopen.cpp
View file @
6bc31506
...
@@ -3810,6 +3810,18 @@ struct test_reduce_mean : verify_program<test_reduce_mean>
...
@@ -3810,6 +3810,18 @@ struct test_reduce_mean : verify_program<test_reduce_mean>
};
};
};
};
struct
test_reduce_mean2
:
verify_program
<
test_reduce_mean2
>
{
migraphx
::
program
create_program
()
const
{
migraphx
::
program
p
;
migraphx
::
shape
s
{
migraphx
::
shape
::
float_type
,
{
1
,
128
,
768
}};
auto
x
=
p
.
add_parameter
(
"x"
,
s
);
p
.
add_instruction
(
migraphx
::
op
::
reduce_mean
{{
2
}},
x
);
return
p
;
};
};
struct
test_reduce_mean_int
:
verify_program
<
test_reduce_mean_int
>
struct
test_reduce_mean_int
:
verify_program
<
test_reduce_mean_int
>
{
{
migraphx
::
program
create_program
()
const
migraphx
::
program
create_program
()
const
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment