Commit 05b808ef authored by Jared Casper's avatar Jared Casper
Browse files

Expand on apply-layernorm-1p description a bit.

parent 93bed794
...@@ -515,7 +515,8 @@ def _add_network_size_args(parser): ...@@ -515,7 +515,8 @@ def _add_network_size_args(parser):
group.add_argument('--layernorm-epsilon', type=float, default=1e-5, group.add_argument('--layernorm-epsilon', type=float, default=1e-5,
help='Layer norm epsilon.') help='Layer norm epsilon.')
group.add_argument('--apply-layernorm-1p', action='store_true', group.add_argument('--apply-layernorm-1p', action='store_true',
help='Weight adjustment centered around zero.') help='Adjust LayerNorm weights such that they are centered '
'around zero. This improves numerical stability.')
group.add_argument('--apply-residual-connection-post-layernorm', group.add_argument('--apply-residual-connection-post-layernorm',
action='store_true', action='store_true',
help='If set, use original BERT residula connection ' help='If set, use original BERT residula connection '
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment