Commit 2e3e5219 authored by Hang Zhang's avatar Hang Zhang
Browse files

pylint

parent 4dcec47d
...@@ -10,7 +10,7 @@ Created by `Hang Zhang <http://hangzh.com/>`_ ...@@ -10,7 +10,7 @@ Created by `Hang Zhang <http://hangzh.com/>`_
An optimized PyTorch package with CUDA backend. An optimized PyTorch package with CUDA backend.
.. note:: .. note::
PyTorch compatible Synchronized Cross-GPU :class:`encoding.nn.BatchNorm2d` has been released. Please checkout the PyTorch compatible Synchronized Cross-GPU :class:`encoding.nn.BatchNorm2d` and the `mnist example <https://github.com/zhanghang1989/PyTorch-SyncBatchNorm>`_.
.. toctree:: .. toctree::
:glob: :glob:
......
...@@ -53,7 +53,7 @@ Suppose we have :math:`K` number of GPUs, :math:`sum(x)_k` and :math:`sum(x^2)_k ...@@ -53,7 +53,7 @@ Suppose we have :math:`K` number of GPUs, :math:`sum(x)_k` and :math:`sum(x^2)_k
* :math:`\frac{d_\ell}{d_{x_i}}=\frac{d_\ell}{d_{y_i}}\frac{\gamma}{\sigma}` can be calculated locally in each GPU. * :math:`\frac{d_\ell}{d_{x_i}}=\frac{d_\ell}{d_{y_i}}\frac{\gamma}{\sigma}` can be calculated locally in each GPU.
* Calculate the gradient of :math:`sum(x)` and :math:`sum(x^2)` individually in each GPU :math:`\frac{d_\ell}{d_{sum(x)_k}}` and :math:`\frac{d_\ell}{d_{sum(x^2)_k}}`. * Calculate the gradient of :math:`sum(x)` and :math:`sum(x^2)` individually in each GPU :math:`\frac{d_\ell}{d_{sum(x)_k}}` and :math:`\frac{d_\ell}{d_{sum(x^2)_k}}`.
* Then Sync the gradient (automatically handled by :class:`encoding.parallel.AllReduce`) and continue the backward. * Then Sync the gradient (automatically handled by :class:`encoding.parallel.allreduce`) and continue the backward.
.. image:: http://hangzh.com/blog/images/bn3.png .. image:: http://hangzh.com/blog/images/bn3.png
:align: center :align: center
......
...@@ -14,14 +14,16 @@ import torch ...@@ -14,14 +14,16 @@ import torch
from torch.nn import Module, Sequential, Conv1d, Conv2d, ConvTranspose2d, \ from torch.nn import Module, Sequential, Conv1d, Conv2d, ConvTranspose2d, \
ReLU, Sigmoid, MaxPool2d, AvgPool2d, AdaptiveAvgPool2d, Dropout2d, Linear ReLU, Sigmoid, MaxPool2d, AvgPool2d, AdaptiveAvgPool2d, Dropout2d, Linear
from torch.nn.modules.batchnorm import _BatchNorm from torch.nn.modules.batchnorm import _BatchNorm
from torch.nn.parameter import Parameter
from ..functions import batchnormtrain, batchnormeval, sum_square from ..functions import batchnormtrain, batchnormeval, sum_square
from ..parallel import allreduce from ..parallel import allreduce
#__all__ = ['BatchNorm1d', 'BatchNorm2d', 'BatchNorm3d'] __all__ = ['BatchNorm1d', 'BatchNorm2d', 'BatchNorm3d', 'Module', 'Sequential', 'Conv1d',
'Conv2d', 'ConvTranspose2d', 'ReLU', 'Sigmoid', 'MaxPool2d', 'AvgPool2d',
'AdaptiveAvgPool2d', 'Dropout2d', 'Linear']
class _SyncBatchNorm(_BatchNorm): class _SyncBatchNorm(_BatchNorm):
# pylint: disable=access-member-before-definition
def __init__(self, num_features, eps=1e-5, momentum=0.1, **kwargs): def __init__(self, num_features, eps=1e-5, momentum=0.1, **kwargs):
super(_SyncBatchNorm, self).__init__(num_features, eps=1e-5, momentum=0.1, **kwargs) super(_SyncBatchNorm, self).__init__(num_features, eps=1e-5, momentum=0.1, **kwargs)
# syncBN # syncBN
......
...@@ -13,15 +13,12 @@ import threading ...@@ -13,15 +13,12 @@ import threading
import torch import torch
from torch.autograd import Function from torch.autograd import Function
import torch.cuda.comm as comm import torch.cuda.comm as comm
from torch.autograd import Variable
from torch.nn.modules import Module
from torch.nn.parallel.data_parallel import DataParallel from torch.nn.parallel.data_parallel import DataParallel
from torch.nn.parallel.scatter_gather import scatter_kwargs
from torch.nn.parallel.replicate import replicate from torch.nn.parallel.replicate import replicate
from torch.nn.parallel.parallel_apply import get_a_var from torch.nn.parallel.parallel_apply import get_a_var
from torch.nn.parallel._functions import ReduceAddCoalesced, Broadcast from torch.nn.parallel._functions import ReduceAddCoalesced, Broadcast
__all__ = ['allreduce', 'ModelDataParallel', 'CriterionDataParallel'] __all__ = ['allreduce', 'DataParallelModel', 'DataParallelCriterion']
def allreduce(num_inputs, *inputs): def allreduce(num_inputs, *inputs):
...@@ -162,4 +159,3 @@ def _criterion_parallel_apply(modules, inputs, targets, kwargs_tup=None, devices ...@@ -162,4 +159,3 @@ def _criterion_parallel_apply(modules, inputs, targets, kwargs_tup=None, devices
raise output raise output
outputs.append(output) outputs.append(output)
return outputs return outputs
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment