##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ ## Created by: Hang Zhang ## ECE Department, Rutgers University ## Email: zhang.hang@rutgers.edu ## Copyright (c) 2017 ## ## This source code is licensed under the MIT-style license found in the ## LICENSE file in the root directory of this source tree ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ import threading import torch import torch.nn.functional as F from torch.autograd import Function, Variable __all__ = ['squeeze_each', 'view_each', 'multi_each', 'sum_each', 'cat_each', 'upsample', 'dropout', 'relu'] def squeeze_each(x, dim=None): """Multi-GPU version torch. squeeze() """ y = [] for i in range(len(x)): if dim is None: y.append(x[i].squeeze()) else: y.append(x[i].squeeze(dim)) return y def view_each(x, size): """Multi-GPU version torch.view Returns a new tensor with the same data but different size. The returned tensor shares the same data and must have the same number of elements, but may have a different size. A tensor must be :attr:`contiguous` to be viewed. Args: input: list of multi-gpu tensors size (torch.Size or int...): Desired size """ y = [] for i in range(len(x)): y.append(x[i].view(size)) return y def multi_each(a, b): """Multi-GPU version multiplication .. math:: y[i] = a[i] * b[i] """ y = [] for i in range(len(a)): y.append(a[i] * b[i]) return y def sum_each(x, y): """Multi-GPU version torch.add .. math:: y[i] = a[i] + b[i] """ assert(len(x)==len(y)) z = [] for i in range(len(x)): z.append(x[i]+y[i]) return z def cat_each(x1, x2, dim): """Multi-GPU version torch.cat .. math:: y[i] = torch.cat(a[i], b[i], dim) """ assert(len(x1)==len(x2)) z = [] for i in range(len(x1)): with torch.cuda.device_of(x1[i]): x = torch.cat((x1[i], x2[i]), dim) z.append(x) return z def dict_to_list(x): """Converting Dict{} to list[] """ y = [] for i in range(len(x)): xi = x[i] if isinstance(xi, Exception): raise xi y.append(xi) return y def upsample(input, size=None, scale_factor=None, mode='nearest'): """Multi-GPU version torch.nn.functional.upsample Upsamples the input to either the given :attr:`size` or the given :attr:`scale_factor` The algorithm used for upsampling is determined by :attr:`mode`. Currently temporal, spatial and volumetric upsampling are supported, i.e. expected inputs are 3-D, 4-D or 5-D in shape. The input dimensions are interpreted in the form: `mini-batch x channels x [depth] x [height] x width` The modes available for upsampling are: `nearest`, `linear` (3D-only), `bilinear` (4D-only), `trilinear` (5D-only) Args: input (Variable): input size (int or Tuple[int] or Tuple[int, int] or Tuple[int, int, int]): output spatial size. scale_factor (int): multiplier for spatial size. Has to be an integer. mode (string): algorithm used for upsampling: 'nearest' | 'linear' | 'bilinear' | 'trilinear'. Default: 'nearest' """ if isinstance(input, Variable): return F.upsample(input, size=size, scale_factor=scale_factor, mode=mode) elif isinstance(input, tuple) or isinstance(input, list): lock = threading.Lock() results = {} def _worker(i, x): try: with torch.cuda.device_of(x): result = F.upsample(x, size=size, \ scale_factor=scale_factor,mode=mode) with lock: results[i] = result except Exception as e: with lock: resutls[i] = e # multi-threading for different gpu threads = [threading.Thread(target=_worker, args=(i, x), ) for i, (x) in enumerate(input)] for thread in threads: thread.start() for thread in threads: thread.join() outputs = dict_to_list(results) return outputs else: raise RuntimeError('unknown input type') def dropout(input, p=0.5, training=False, inplace=True): """Multi-GPU version torch.nn.functional.droupout The channels to zero-out are randomized on every forward call. *Usually the input comes from Conv2d modules.* As described in the paper `Efficient Object Localization Using Convolutional Networks`, if adjacent pixels within feature maps are strongly correlated (as is normally the case in early convolution layers) then iid dropout will not regularize the activations and will otherwise just result in an effective learning rate decrease. In this case, :func:`nn.Dropout2d` will help promote independence between feature maps and should be used instead. Args: p (float, optional): probability of an element to be zeroed. inplace (bool, optional): If set to True, will do this operation in-place Shape: - Input: :math:`(N, C, H, W)` - Output: :math:`(N, C, H, W)` (same shape as input) """ if isinstance(input, Variable): return F.dropout(input, p, training, inplace) elif isinstance(input, tuple) or isinstance(input, list): lock = threading.Lock() results = {} def _worker(i, x): try: with torch.cuda.device_of(x): result = F.dropout(x, p, training, inplace) with lock: results[i] = result except Exception as e: with lock: resutls[i] = e # multi-threading for different gpu threads = [threading.Thread(target=_worker, args=(i, x), ) for i, (x) in enumerate(input)] for thread in threads: thread.start() for thread in threads: thread.join() outputs = dict_to_list(results) return outputs else: raise RuntimeError('unknown input type') def relu(input, inplace=False): """Multi-GPU version torch.nn.functional.relu Applies the rectified linear unit function element-wise :math:`{ReLU}(x)= max(0, x)` Args: inplace: can optionally do the operation in-place. Default: False Shape: - Input: :math:`(N, *)` where `*` means, any number of additional dimensions - Output: :math:`(N, *)`, same shape as the input """ if isinstance(input, Variable): return F.relu(input, inplace) elif isinstance(input, tuple) or isinstance(input, list): lock = threading.Lock() results = {} def _worker(i, x): try: with torch.cuda.device_of(x): result = F.relu(x, inplace) with lock: results[i] = result except Exception as e: with lock: resutls[i] = e # multi-threading for different gpu threads = [threading.Thread(target=_worker, args=(i, x), ) for i, (x) in enumerate(input)] for thread in threads: thread.start() for thread in threads: thread.join() outputs = dict_to_list(results) return outputs else: raise RuntimeError('unknown input type')