Unverified Commit 9165b27f authored by Andrew Sears's avatar Andrew Sears Committed by GitHub
Browse files

Fixes flake8 --select W605 test warnings (#829)


Signed-off-by: default avatarasears <asears@users.noreply.github.com>
parent e1b7997a
...@@ -29,7 +29,7 @@ class FusedAdam(torch.optim.Optimizer): ...@@ -29,7 +29,7 @@ class FusedAdam(torch.optim.Optimizer):
use_mt (boolean, optional): use multi tensor apply for lower launch use_mt (boolean, optional): use multi tensor apply for lower launch
latency. (default: False) latency. (default: False)
.. _Adam\: A Method for Stochastic Optimization: .. _Adam - A Method for Stochastic Optimization:
https://arxiv.org/abs/1412.6980 https://arxiv.org/abs/1412.6980
.. _On the Convergence of Adam and Beyond: .. _On the Convergence of Adam and Beyond:
https://openreview.net/forum?id=ryQu7f-RZ https://openreview.net/forum?id=ryQu7f-RZ
......
...@@ -54,7 +54,7 @@ class FusedLAMB(torch.optim.Optimizer): ...@@ -54,7 +54,7 @@ class FusedLAMB(torch.optim.Optimizer):
max_grad_norm (float, optional): value used to clip global grad norm max_grad_norm (float, optional): value used to clip global grad norm
(default: 1.0) (default: 1.0)
.. _Large Batch Optimization for Deep Learning\: Training BERT in 76 minutes: .. _Large Batch Optimization for Deep Learning - Training BERT in 76 minutes:
https://arxiv.org/abs/1904.00962 https://arxiv.org/abs/1904.00962
.. _On the Convergence of Adam and Beyond: .. _On the Convergence of Adam and Beyond:
https://openreview.net/forum?id=ryQu7f-RZ https://openreview.net/forum?id=ryQu7f-RZ
......
...@@ -53,7 +53,7 @@ class FusedAdam(torch.optim.Optimizer): ...@@ -53,7 +53,7 @@ class FusedAdam(torch.optim.Optimizer):
set_grad_none (bool, optional): whether set grad to None when zero_grad() set_grad_none (bool, optional): whether set grad to None when zero_grad()
method is called. (default: True) method is called. (default: True)
.. _Adam\: A Method for Stochastic Optimization: .. _Adam - A Method for Stochastic Optimization:
https://arxiv.org/abs/1412.6980 https://arxiv.org/abs/1412.6980
.. _On the Convergence of Adam and Beyond: .. _On the Convergence of Adam and Beyond:
https://openreview.net/forum?id=ryQu7f-RZ https://openreview.net/forum?id=ryQu7f-RZ
......
...@@ -52,7 +52,7 @@ class FusedLAMB(torch.optim.Optimizer): ...@@ -52,7 +52,7 @@ class FusedLAMB(torch.optim.Optimizer):
max_grad_norm (float, optional): value used to clip global grad norm max_grad_norm (float, optional): value used to clip global grad norm
(default: 1.0) (default: 1.0)
.. _Large Batch Optimization for Deep Learning\: Training BERT in 76 minutes: .. _Large Batch Optimization for Deep Learning - Training BERT in 76 minutes:
https://arxiv.org/abs/1904.00962 https://arxiv.org/abs/1904.00962
.. _On the Convergence of Adam and Beyond: .. _On the Convergence of Adam and Beyond:
https://openreview.net/forum?id=ryQu7f-RZ https://openreview.net/forum?id=ryQu7f-RZ
......
...@@ -58,7 +58,7 @@ class FusedNovoGrad(torch.optim.Optimizer): ...@@ -58,7 +58,7 @@ class FusedNovoGrad(torch.optim.Optimizer):
set_grad_none (bool, optional): whether set grad to None when zero_grad() set_grad_none (bool, optional): whether set grad to None when zero_grad()
method is called. (default: True) method is called. (default: True)
.. _Jasper\: An End-to-End Convolutional Neural Acoustic Model: .. _Jasper - An End-to-End Convolutional Neural Acoustic Model:
https://arxiv.org/abs/1904.03288 https://arxiv.org/abs/1904.03288
.. _On the Convergence of Adam and Beyond: .. _On the Convergence of Adam and Beyond:
https://openreview.net/forum?id=ryQu7f-RZ https://openreview.net/forum?id=ryQu7f-RZ
......
...@@ -2,7 +2,7 @@ from .weight_norm import WeightNorm ...@@ -2,7 +2,7 @@ from .weight_norm import WeightNorm
from .reparameterization import Reparameterization from .reparameterization import Reparameterization
def apply_weight_norm(module, name='', dim=0, hook_child=True): def apply_weight_norm(module, name='', dim=0, hook_child=True):
""" r"""
Applies weight normalization to a parameter in the given module. Applies weight normalization to a parameter in the given module.
If no parameter is provided, applies weight normalization to all If no parameter is provided, applies weight normalization to all
parameters in model (except 1-d vectors and scalars). parameters in model (except 1-d vectors and scalars).
......
...@@ -20,7 +20,7 @@ def _norm(p, dim): ...@@ -20,7 +20,7 @@ def _norm(p, dim):
HALF_TYPES = (torch.cuda.HalfTensor, torch.HalfTensor) HALF_TYPES = (torch.cuda.HalfTensor, torch.HalfTensor)
class WeightNorm(Reparameterization): class WeightNorm(Reparameterization):
""" r"""
Weight normalization is a reparameterization that decouples the magnitude Weight normalization is a reparameterization that decouples the magnitude
of a weight tensor from its direction. This replaces the parameter specified of a weight tensor from its direction. This replaces the parameter specified
by `name` (e.g. "weight") with two parameters: one specifying the magnitude by `name` (e.g. "weight") with two parameters: one specifying the magnitude
......
-r requirements.txt
flake8>=3.7.9
Sphinx>=3.0.3
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment