Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
95665ff2
Commit
95665ff2
authored
Aug 29, 2022
by
Khalique Ahmed
Browse files
take in epsilon as parameter
parent
928da1f0
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
25 additions
and
13 deletions
+25
-13
src/include/migraphx/match/layernorm.hpp
src/include/migraphx/match/layernorm.hpp
+1
-2
src/targets/gpu/jit/layernorm.cpp
src/targets/gpu/jit/layernorm.cpp
+4
-2
src/targets/gpu/kernels/include/migraphx/kernels/layernorm.hpp
...argets/gpu/kernels/include/migraphx/kernels/layernorm.hpp
+8
-7
src/targets/gpu/prefuse_ops.cpp
src/targets/gpu/prefuse_ops.cpp
+12
-2
No files found.
src/include/migraphx/match/layernorm.hpp
View file @
95665ff2
...
@@ -51,8 +51,7 @@ struct layernorm_matcher
...
@@ -51,8 +51,7 @@ struct layernorm_matcher
return
f
(
"div"
)(
arg
(
0
)(
x_minus_mean
()),
return
f
(
"div"
)(
arg
(
0
)(
x_minus_mean
()),
arg
(
1
)(
skip_broadcasts
(
f
(
"sqrt"
)(
arg
(
0
)(
f
(
"add"
)(
arg
(
1
)(
skip_broadcasts
(
f
(
"sqrt"
)(
arg
(
0
)(
f
(
"add"
)(
either_arg
(
0
,
1
)(
variance
(),
is_constant
())))))));
// 71.7596/sec
either_arg
(
0
,
1
)(
variance
(),
is_constant
().
bind
(
"eps"
))))))));
// arg(0)(f("add")(either_arg(0, 1)(variance(), has_value(1e-12f)))))))); // 70.8157/sec
}
}
auto
matcher
()
const
{
return
layernorm_onnx
();
}
auto
matcher
()
const
{
return
layernorm_onnx
();
}
...
...
src/targets/gpu/jit/layernorm.cpp
View file @
95665ff2
...
@@ -54,7 +54,7 @@ __global__ void ${kernel}(${params})
...
@@ -54,7 +54,7 @@ __global__ void ${kernel}(${params})
{
{
auto idx = make_index();
auto idx = make_index();
transform_args(make_tensors(), rotate_last(), ${transformers})(${args})([](auto... xs) {
transform_args(make_tensors(), rotate_last(), ${transformers})(${args})([](auto... xs) {
${layernorm}<${axis}>(${post}, xs...);
${layernorm}<${axis}>(${post},
${eps},
xs...);
});
});
}
}
...
@@ -92,6 +92,7 @@ struct layernorm_compiler : compiler<layernorm_compiler>
...
@@ -92,6 +92,7 @@ struct layernorm_compiler : compiler<layernorm_compiler>
options
.
output
=
inputs
.
back
();
options
.
output
=
inputs
.
back
();
options
.
inputs
=
inputs
;
options
.
inputs
=
inputs
;
options
.
kernel_name
=
v
.
get
(
"kernel"
,
"layernorm_kernel"
);
options
.
kernel_name
=
v
.
get
(
"kernel"
,
"layernorm_kernel"
);
auto
eps
=
v
.
get
(
"epsilon"
,
1e-12
f
);
auto
src
=
interpolate_string
(
layernorm_kernel
,
auto
src
=
interpolate_string
(
layernorm_kernel
,
{{
"kernel"
,
options
.
kernel_name
},
{{
"kernel"
,
options
.
kernel_name
},
...
@@ -101,7 +102,8 @@ struct layernorm_compiler : compiler<layernorm_compiler>
...
@@ -101,7 +102,8 @@ struct layernorm_compiler : compiler<layernorm_compiler>
{
"post"
,
v
.
get
(
"post"
,
std
::
string
{
"op::id{}"
})},
{
"post"
,
v
.
get
(
"post"
,
std
::
string
{
"op::id{}"
})},
{
"preamble"
,
v
.
get
(
"preamble"
,
std
::
string
{})},
{
"preamble"
,
v
.
get
(
"preamble"
,
std
::
string
{})},
{
"layernorm"
,
v
.
get
(
"layernorm"
,
std
::
string
{
"layernorm"
})},
{
"layernorm"
,
v
.
get
(
"layernorm"
,
std
::
string
{
"layernorm"
})},
{
"axis"
,
to_string
(
axis
)}});
{
"axis"
,
to_string
(
axis
)},
{
"eps"
,
to_string
(
eps
)}});
return
compile_hip_code_object
(
src
,
options
);
return
compile_hip_code_object
(
src
,
options
);
}
}
...
...
src/targets/gpu/kernels/include/migraphx/kernels/layernorm.hpp
View file @
95665ff2
...
@@ -43,7 +43,7 @@ template <index_int Axis,
...
@@ -43,7 +43,7 @@ template <index_int Axis,
class
Input2
,
class
Input2
,
class
...
Inputs
>
class
...
Inputs
>
__device__
void
generic_binary_layernorm
(
__device__
void
generic_binary_layernorm
(
F
compute
,
BinOp
op
,
Output
output
,
Input1
input1
,
Input2
input2
,
Inputs
...
inputs
)
F
compute
,
BinOp
op
,
float
eps
,
Output
output
,
Input1
input1
,
Input2
input2
,
Inputs
...
inputs
)
{
{
using
reduce_output
=
reduce
::
with_axis
<
Input1
,
Axis
>
;
using
reduce_output
=
reduce
::
with_axis
<
Input1
,
Axis
>
;
reduce
::
block
::
run
<
reduce_output
>
([
&
](
auto
,
auto
r
)
{
reduce
::
block
::
run
<
reduce_output
>
([
&
](
auto
,
auto
r
)
{
...
@@ -58,29 +58,30 @@ __device__ void generic_binary_layernorm(
...
@@ -58,29 +58,30 @@ __device__ void generic_binary_layernorm(
auto
mean_x
=
means
[
0
];
auto
mean_x
=
means
[
0
];
auto
mean_x2
=
means
[
1
];
auto
mean_x2
=
means
[
1
];
auto
variance
=
mean_x2
-
(
mean_x
*
mean_x
);
auto
variance
=
mean_x2
-
(
mean_x
*
mean_x
);
auto
eps_val
=
static_cast
<
value_type
>
(
eps
);
r
.
inner
([
&
](
auto
&
y
,
auto
x1
,
auto
x2
,
auto
...
xs
)
{
r
.
inner
([
&
](
auto
&
y
,
auto
x1
,
auto
x2
,
auto
...
xs
)
{
auto
x
=
op
(
x1
,
x2
);
auto
x
=
op
(
x1
,
x2
);
auto
m
=
x
-
mean_x
;
auto
m
=
x
-
mean_x
;
// m * rsqrt(mean(m ^ 2) +
1.00136e-05
)
// m * rsqrt(mean(m ^ 2) +
epsilon
)
y
=
compute
(
m
*
rsqrt
(
variance
+
value_type
{
1.00136e-05
}
),
xs
...);
y
=
compute
(
m
*
rsqrt
(
variance
+
eps_val
),
xs
...);
})(
output
,
input1
,
input2
,
inputs
...);
})(
output
,
input1
,
input2
,
inputs
...);
});
});
}
}
template
<
index_int
Axis
,
class
F
,
class
Output
,
class
Input
,
class
...
Inputs
>
template
<
index_int
Axis
,
class
F
,
class
Output
,
class
Input
,
class
...
Inputs
>
__device__
void
layernorm
(
F
compute
,
Output
output
,
Input
input
,
Inputs
...
inputs
)
__device__
void
layernorm
(
F
compute
,
float
eps
,
Output
output
,
Input
input
,
Inputs
...
inputs
)
{
{
generic_binary_layernorm
<
Axis
>
(
generic_binary_layernorm
<
Axis
>
(
compute
,
[](
auto
x
,
auto
)
{
return
x
;
},
output
,
input
,
input
,
inputs
...);
compute
,
[](
auto
x
,
auto
)
{
return
x
;
},
eps
,
output
,
input
,
input
,
inputs
...);
}
}
template
<
index_int
Axis
,
class
F
,
class
Output
,
class
Input1
,
class
Input2
,
class
...
Inputs
>
template
<
index_int
Axis
,
class
F
,
class
Output
,
class
Input1
,
class
Input2
,
class
...
Inputs
>
__device__
void
__device__
void
add_layernorm
(
F
compute
,
Output
output
,
Input1
input1
,
Input2
input2
,
Inputs
...
inputs
)
add_layernorm
(
F
compute
,
float
eps
,
Output
output
,
Input1
input1
,
Input2
input2
,
Inputs
...
inputs
)
{
{
generic_binary_layernorm
<
Axis
>
(
generic_binary_layernorm
<
Axis
>
(
compute
,
[](
auto
x1
,
auto
x2
)
{
return
x1
+
x2
;
},
output
,
input1
,
input2
,
inputs
...);
compute
,
[](
auto
x1
,
auto
x2
)
{
return
x1
+
x2
;
},
eps
,
output
,
input1
,
input2
,
inputs
...);
}
}
}
// namespace migraphx
}
// namespace migraphx
...
...
src/targets/gpu/prefuse_ops.cpp
View file @
95665ff2
...
@@ -34,6 +34,13 @@ namespace {
...
@@ -34,6 +34,13 @@ namespace {
template
<
class
Derived
,
std
::
size_t
N
>
template
<
class
Derived
,
std
::
size_t
N
>
struct
layernorm_base
struct
layernorm_base
{
{
float
epsilon
=
1e-12
f
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
pack
(
f
(
self
.
epsilon
,
"epsilon"
));
}
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
,
std
::
vector
<
module_ref
>
mods
)
const
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
,
std
::
vector
<
module_ref
>
mods
)
const
{
{
std
::
size_t
nargs
=
1
;
std
::
size_t
nargs
=
1
;
...
@@ -61,6 +68,7 @@ struct layernorm_base
...
@@ -61,6 +68,7 @@ struct layernorm_base
struct
layernorm
:
layernorm_base
<
layernorm
,
0
>
struct
layernorm
:
layernorm_base
<
layernorm
,
0
>
{
{
std
::
string
name
()
const
{
return
"gpu::prelayernorm"
;
}
std
::
string
name
()
const
{
return
"gpu::prelayernorm"
;
}
};
};
MIGRAPHX_REGISTER_OP
(
layernorm
);
MIGRAPHX_REGISTER_OP
(
layernorm
);
...
@@ -79,8 +87,9 @@ struct find_layernorm
...
@@ -79,8 +87,9 @@ struct find_layernorm
{
{
auto
ins
=
r
.
result
;
auto
ins
=
r
.
result
;
auto
x_ins
=
r
.
instructions
[
"x"
];
auto
x_ins
=
r
.
instructions
[
"x"
];
auto
eps
=
r
.
instructions
[
"eps"
]
->
eval
().
at
<
float
>
();
m
.
replace_instruction
(
ins
,
layernorm
{},
x_ins
);
m
.
replace_instruction
(
ins
,
layernorm
{
eps
},
x_ins
);
}
}
};
};
...
@@ -95,8 +104,9 @@ struct find_add_layernorm
...
@@ -95,8 +104,9 @@ struct find_add_layernorm
{
{
auto
ins
=
r
.
result
;
auto
ins
=
r
.
result
;
auto
add_ins
=
r
.
instructions
[
"add"
];
auto
add_ins
=
r
.
instructions
[
"add"
];
auto
eps
=
r
.
instructions
[
"eps"
]
->
eval
().
at
<
float
>
();
m
.
replace_instruction
(
ins
,
add_layernorm
{},
add_ins
->
inputs
());
m
.
replace_instruction
(
ins
,
add_layernorm
{
eps
},
add_ins
->
inputs
());
}
}
};
};
}
// namespace
}
// namespace
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment