Fixes flake8 --select W605 test warnings (#829)

Signed-off-by: asears <asears@users.noreply.github.com>

Fixes flake8 --select W605 test warnings (#829)
Signed-off-by: asears <asears@users.noreply.github.com>
9165b27f · Andrew Sears · GitHub · e1b7997a · 9165b27f · 9165b27f
Unverified Commit 9165b27f authored May 13, 2020 by Andrew Sears Committed by GitHub May 13, 2020
8 changed files
--- a/apex/contrib/optimizers/fused_adam.py
+++ b/apex/contrib/optimizers/fused_adam.py
@@ -29,7 +29,7 @@ class FusedAdam(torch.optim.Optimizer):
        use_mt (boolean, optional): use multi tensor apply for lower launch
            latency. (default: False)

-    .. _Adam\: A Method for Stochastic Optimization:
+    .. _Adam - A Method for Stochastic Optimization:
        https://arxiv.org/abs/1412.6980
    .. _On the Convergence of Adam and Beyond:
        https://openreview.net/forum?id=ryQu7f-RZ

--- a/apex/contrib/optimizers/fused_lamb.py
+++ b/apex/contrib/optimizers/fused_lamb.py
@@ -54,7 +54,7 @@ class FusedLAMB(torch.optim.Optimizer):
        max_grad_norm (float, optional): value used to clip global grad norm
            (default: 1.0)

-    .. _Large Batch Optimization for Deep Learning\: Training BERT in 76 minutes:
+    .. _Large Batch Optimization for Deep Learning - Training BERT in 76 minutes:
        https://arxiv.org/abs/1904.00962
    .. _On the Convergence of Adam and Beyond:
        https://openreview.net/forum?id=ryQu7f-RZ

--- a/apex/optimizers/fused_adam.py
+++ b/apex/optimizers/fused_adam.py
@@ -53,7 +53,7 @@ class FusedAdam(torch.optim.Optimizer):
        set_grad_none (bool, optional): whether set grad to None when zero_grad()
            method is called. (default: True)

-    .. _Adam\: A Method for Stochastic Optimization:
+    .. _Adam - A Method for Stochastic Optimization:
        https://arxiv.org/abs/1412.6980
    .. _On the Convergence of Adam and Beyond:
        https://openreview.net/forum?id=ryQu7f-RZ

--- a/apex/optimizers/fused_lamb.py
+++ b/apex/optimizers/fused_lamb.py
@@ -52,7 +52,7 @@ class FusedLAMB(torch.optim.Optimizer):
        max_grad_norm (float, optional): value used to clip global grad norm
            (default: 1.0)

-    .. _Large Batch Optimization for Deep Learning\: Training BERT in 76 minutes:
+    .. _Large Batch Optimization for Deep Learning - Training BERT in 76 minutes:
        https://arxiv.org/abs/1904.00962
    .. _On the Convergence of Adam and Beyond:
        https://openreview.net/forum?id=ryQu7f-RZ

--- a/apex/optimizers/fused_novograd.py
+++ b/apex/optimizers/fused_novograd.py
@@ -58,7 +58,7 @@ class FusedNovoGrad(torch.optim.Optimizer):
        set_grad_none (bool, optional): whether set grad to None when zero_grad()
            method is called. (default: True)

-    .. _Jasper\: An End-to-End Convolutional Neural Acoustic Model:
+    .. _Jasper - An End-to-End Convolutional Neural Acoustic Model:
        https://arxiv.org/abs/1904.03288
    .. _On the Convergence of Adam and Beyond:
        https://openreview.net/forum?id=ryQu7f-RZ

--- a/apex/reparameterization/__init__.py
+++ b/apex/reparameterization/__init__.py
@@ -2,7 +2,7 @@ from .weight_norm import WeightNorm
 from .reparameterization import Reparameterization

 def apply_weight_norm(module, name='', dim=0, hook_child=True):
-    """
+    r"""
    Applies weight normalization to a parameter in the given module.
    If no parameter is provided, applies weight normalization to all
    parameters in model (except 1-d vectors and scalars).

--- a/apex/reparameterization/weight_norm.py
+++ b/apex/reparameterization/weight_norm.py
@@ -20,7 +20,7 @@ def _norm(p, dim):
 HALF_TYPES = (torch.cuda.HalfTensor, torch.HalfTensor)

 class WeightNorm(Reparameterization):
-    """
+    r"""
    Weight normalization is a reparameterization that decouples the magnitude
    of a weight tensor from its direction. This replaces the parameter specified
    by `name` (e.g. "weight") with two parameters: one specifying the magnitude

--- a/requirements_dev.txt
+++ b/requirements_dev.txt
+-r requirements.txt
+flake8>=3.7.9
+Sphinx>=3.0.3
\ No newline at end of file