Commit c142714b authored by Michael Carilli's avatar Michael Carilli
Browse files

Merging in latest master changes

parents b620f96b e6eec3ba
import torch.nn as nn
class RNNModel(nn.Module):
"""Container module with an encoder, a recurrent module, and a decoder."""
def __init__(self, rnn_type, ntoken, ninp, nhid, nlayers, dropout=0.5, tie_weights=False):
super(RNNModel, self).__init__()
self.drop = nn.Dropout(dropout)
self.encoder = nn.Embedding(ntoken, ninp)
if rnn_type in ['LSTM', 'GRU']:
self.rnn = getattr(nn, rnn_type)(ninp, nhid, nlayers, dropout=dropout)
else:
try:
nonlinearity = {'RNN_TANH': 'tanh', 'RNN_RELU': 'relu'}[rnn_type]
except KeyError:
raise ValueError("""An invalid option for `--model` was supplied,
options are ['LSTM', 'GRU', 'RNN_TANH' or 'RNN_RELU']""")
self.rnn = nn.RNN(ninp, nhid, nlayers, nonlinearity=nonlinearity, dropout=dropout)
self.decoder = nn.Linear(nhid, ntoken)
# Optionally tie weights as in:
# "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016)
# https://arxiv.org/abs/1608.05859
# and
# "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016)
# https://arxiv.org/abs/1611.01462
if tie_weights:
if nhid != ninp:
raise ValueError('When using the tied flag, nhid must be equal to emsize')
self.decoder.weight = self.encoder.weight
self.init_weights()
self.rnn_type = rnn_type
self.nhid = nhid
self.nlayers = nlayers
def init_weights(self):
initrange = 0.1
self.encoder.weight.data.uniform_(-initrange, initrange)
self.decoder.bias.data.fill_(0)
self.decoder.weight.data.uniform_(-initrange, initrange)
def forward(self, input, hidden):
emb = self.drop(self.encoder(input))
output, hidden = self.rnn(emb, hidden)
output = self.drop(output)
decoded = self.decoder(output.view(output.size(0)*output.size(1), output.size(2)))
return decoded.view(output.size(0), output.size(1), decoded.size(1)), hidden
def init_hidden(self, bsz):
weight = next(self.parameters()).data
if self.rnn_type == 'LSTM':
return (weight.new(self.nlayers, bsz, self.nhid).zero_(),
weight.new(self.nlayers, bsz, self.nhid).zero_())
else:
return weight.new(self.nlayers, bsz, self.nhid).zero_()
# Base image must at least have pytorch and CUDA installed. # Base image must at least have pytorch and CUDA installed.
ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:19.01-py3 ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:19.03-py3
FROM $BASE_IMAGE FROM $BASE_IMAGE
ARG BASE_IMAGE ARG BASE_IMAGE
RUN echo "Installing Apex on top of ${BASE_IMAGE}" RUN echo "Installing Apex on top of ${BASE_IMAGE}"
WORKDIR /workspace # make sure we don't overwrite some existing directory called "apex"
# uninstall Apex if present WORKDIR /tmp/unique_for_apex
# uninstall Apex if present, twice to make absolutely sure :)
RUN pip uninstall -y apex || :
RUN pip uninstall -y apex || : RUN pip uninstall -y apex || :
# SHA is something the user can touch to force recreation of this Docker layer, # SHA is something the user can touch to force recreation of this Docker layer,
# and therefore force cloning of the latest version of Apex # and therefore force cloning of the latest version of Apex
RUN SHA=ToUcHMe git clone https://github.com/NVIDIA/apex.git RUN SHA=ToUcHMe git clone https://github.com/NVIDIA/apex.git
WORKDIR /workspace/apex WORKDIR /tmp/unique_for_apex/apex
RUN python setup.py install --cuda_ext --cpp_ext RUN pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" .
WORKDIR /workspace WORKDIR /workspace
...@@ -2,22 +2,31 @@ ...@@ -2,22 +2,31 @@
**Dockerfile** installs the latest Apex on top of an existing image. Run **Dockerfile** installs the latest Apex on top of an existing image. Run
``` ```
docker build -t image_with_apex . docker build -t new_image_with_apex .
``` ```
By default, **Dockerfile** uses NVIDIA's Pytorch container as the base image, By default, **Dockerfile** uses NVIDIA's Pytorch container as the base image,
which requires an NVIDIA GPU Cloud (NGC) account. If you don't have an NGC account, you can sign up for free by following the instructions [here](https://docs.nvidia.com/ngc/ngc-getting-started-guide/index.html#generating-api-key). which requires an NVIDIA GPU Cloud (NGC) account. If you don't have an NGC account, you can sign up for free by following the instructions [here](https://docs.nvidia.com/ngc/ngc-getting-started-guide/index.html#generating-api-key).
Alternatively, you can supply your own base image via the `BASE_IMAGE` build-arg. Alternatively, you can supply your own base image via the `BASE_IMAGE` build-arg.
Any `BASE_IMAGE` you supply must have Pytorch and Cuda installed, for example: `BASE_IMAGE` must have Pytorch and Cuda installed. For example, any
`-devel` image for Pytorch 1.0 and later from the
[official Pytorch Dockerhub](https://hub.docker.com/r/pytorch/pytorch) may be used:
``` ```
docker build --build-arg BASE_IMAGE=pytorch/pytorch:0.4-cuda9-cudnn7-devel -t image_with_apex . docker build --build-arg BASE_IMAGE=pytorch/pytorch:nightly-devel-cuda10.0-cudnn7 -t new_image_with_apex .
``` ```
If you want to rebuild your image, and force the latest Apex to be cloned and installed, make any small change to the `SHA` variable in **Dockerfile**. If you want to rebuild your image, and force the latest Apex to be cloned and installed, make any small change to the `SHA` variable in **Dockerfile**.
**Warning:** **Warning:**
Currently, Pytorch's default non-devel image on Dockerhub Currently, the non-`-devel` images on Pytorch Dockerhub do not contain the Cuda compiler `nvcc`. Therefore,
[pytorch/pytorch:0.4_cuda9_cudnn7](https://hub.docker.com/r/pytorch/pytorch/tags/) contains Pytorch installed with prebuilt binaries. It does not contain NVCC, which means it is not an eligible candidate for `<base image>`. images whose name does not contain `-devel` are not eligible candidates for `BASE_IMAGE`.
### Running your Apex container
Like any Cuda-enabled Pytorch container, a container with Apex should be run via [nvidia-docker](https://github.com/NVIDIA/nvidia-docker), for example:
```
docker run --runtime=nvidia -it --rm --ipc=host new_image_with_apex
```
## Option 2: Install Apex in a running container ## Option 2: Install Apex in a running container
...@@ -25,4 +34,7 @@ Instead of building a new container, it is also a viable option to `git clone ht ...@@ -25,4 +34,7 @@ Instead of building a new container, it is also a viable option to `git clone ht
``` ```
docker run --runtime=nvidia -it --rm --ipc=host -v /bare/metal/apex:/apex/in/container <base image> docker run --runtime=nvidia -it --rm --ipc=host -v /bare/metal/apex:/apex/in/container <base image>
``` ```
then go to /apex/in/container within the running container and `python setup.py install [--cuda_ext] [--cpp_ext]`. then go to /apex/in/container within the running container and
```
pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" .
```
...@@ -82,7 +82,6 @@ def fast_collate(batch): ...@@ -82,7 +82,6 @@ def fast_collate(batch):
tensor = torch.zeros( (len(imgs), 3, h, w), dtype=torch.uint8 ) tensor = torch.zeros( (len(imgs), 3, h, w), dtype=torch.uint8 )
for i, img in enumerate(imgs): for i, img in enumerate(imgs):
nump_array = np.asarray(img, dtype=np.uint8) nump_array = np.asarray(img, dtype=np.uint8)
tens = torch.from_numpy(nump_array)
if(nump_array.ndim < 3): if(nump_array.ndim < 3):
nump_array = np.expand_dims(nump_array, axis=-1) nump_array = np.expand_dims(nump_array, axis=-1)
nump_array = np.rollaxis(nump_array, 2) nump_array = np.rollaxis(nump_array, 2)
...@@ -190,8 +189,9 @@ def main(): ...@@ -190,8 +189,9 @@ def main():
valdir = os.path.join(args.data, 'val') valdir = os.path.join(args.data, 'val')
if(args.arch == "inception_v3"): if(args.arch == "inception_v3"):
crop_size = 299 raise RuntimeError("Currently, inception_v3 is not supported by this example.")
val_size = 320 # I chose this value arbitrarily, we can adjust. # crop_size = 299
# val_size = 320 # I chose this value arbitrarily, we can adjust.
else: else:
crop_size = 224 crop_size = 224
val_size = 256 val_size = 256
......
...@@ -47,10 +47,12 @@ def check_cuda_torch_binary_vs_bare_metal(cuda_dir): ...@@ -47,10 +47,12 @@ def check_cuda_torch_binary_vs_bare_metal(cuda_dir):
print(raw_output + "from " + cuda_dir + "/bin\n") print(raw_output + "from " + cuda_dir + "/bin\n")
if (bare_metal_major != torch_binary_major) or (bare_metal_minor != torch_binary_minor): if (bare_metal_major != torch_binary_major) or (bare_metal_minor != torch_binary_minor):
# TODO: make this a hard error? raise RuntimeError("Cuda extensions are being compiled with a version of Cuda that does " +
print("\nWarning: Cuda extensions are being compiled with a version of Cuda that does " "not match the version used to compile Pytorch binaries. " +
"not match the version used to compile Pytorch binaries.\n") "Pytorch binaries were compiled with Cuda {}.\n".format(torch.version.cuda) +
print("Pytorch binaries were compiled with Cuda {}\n".format(torch.version.cuda)) "In some cases, a minor-version mismatch will not cause later errors: " +
"https://github.com/NVIDIA/apex/pull/323#discussion_r287021798. "
"You can try commenting out this check (at your own risk).")
if "--cuda_ext" in sys.argv: if "--cuda_ext" in sys.argv:
from torch.utils.cpp_extension import CUDAExtension from torch.utils.cpp_extension import CUDAExtension
......
import unittest
import functools as ft
import itertools as it
from apex import amp
from apex.amp import _amp_state
import torch
from torch import nn
import torch.nn.functional as F
from torch.nn import Parameter
from utils import common_init, HALF, FLOAT,\
ALWAYS_HALF, ALWAYS_FLOAT, MATCH_INPUT
class MyModel(torch.nn.Module):
def __init__(self, unique):
super(MyModel, self).__init__()
self.weight0 = Parameter(unique +
torch.arange(2, device='cuda', dtype=torch.float32))
self.weight1 = Parameter(1. + unique + torch.arange(2, device='cuda', dtype=torch.float16))
@staticmethod
def ops(input, weight0, weight1):
return ((input*(weight0.float()))*(weight1.float())).sum()
def forward(self, input):
return self.ops(input, self.weight0, self.weight1)
# Abandon all hope, ye who enter here.
class TestAddParamGroup(unittest.TestCase):
def setUp(self):
self.x = torch.ones((2), device='cuda', dtype=torch.float32)
common_init(self)
def tearDown(self):
pass
def zero_grad(self, models, optimizer, how_to_zero):
if how_to_zero == "none":
for model in models:
for param in model.parameters():
param.grad = None
elif how_to_zero == "model":
for model in models:
model.zero_grad()
elif how_to_zero == "optimizer":
optimizer.zero_grad()
def test_add_param_group(self):
for opt_level in ("O0", "O1", "O2", "O3"):
for zero_before_add in (True, False):
for try_accumulation in (True, False):
model0 = MyModel(1)
model1 = MyModel(2)
optimizer = torch.optim.SGD([{'params' : model0.parameters(), 'lr' : 0.25}],
momentum=0.125)
optimizer.zero_grad()
loss = model0(self.x)
loss.backward()
optimizer.step()
if zero_before_add:
optimizer.zero_grad()
optimizer.add_param_group({'params' : model1.parameters(), 'lr' : 0.5})
if not zero_before_add:
optimizer.zero_grad()
loss = model0(self.x) + model1(self.x)
loss.backward(retain_graph=try_accumulation)
if try_accumulation:
loss.backward()
optimizer.step()
# Once more to make sure the new params pick up momemtums properly
optimizer.zero_grad()
loss = model0(self.x) + model1(self.x)
loss.backward(retain_graph=try_accumulation)
if try_accumulation:
loss.backward()
optimizer.step()
reference_params = [param.data.clone() for param in model0.parameters()] + \
[param.data.clone() for param in model1.parameters()]
for how_to_zero in "none", "model", "optimizer":
model0 = MyModel(1)
model1 = MyModel(2)
optimizer = torch.optim.SGD([{'params' : model0.parameters(), 'lr' : 0.25}],
momentum=0.125)
_amp_state.allow_incoming_model_not_fp32 = True
[model0, model1], optimizer = amp.initialize([model0, model1],
optimizer,
opt_level=opt_level,
verbosity=0,
cast_model_type=False)
_amp_state.allow_incoming_model_not_fp32 = False
_amp_state.loss_scalers[0]._loss_scale = 4.0
self.zero_grad([model0, model1], optimizer, how_to_zero)
loss = model0(self.x)
with amp.scale_loss(loss, optimizer) as scaled_loss:
scaled_loss.backward()
optimizer.step()
if zero_before_add:
self.zero_grad([model0, model1], optimizer, how_to_zero)
optimizer.add_param_group({'params' : model1.parameters(), 'lr' : 0.5})
if not zero_before_add:
self.zero_grad([model0, model1], optimizer, how_to_zero)
loss = model0(self.x) + model1(self.x)
with amp.scale_loss(loss, optimizer) as scaled_loss:
scaled_loss.backward(retain_graph=try_accumulation)
if try_accumulation:
with amp.scale_loss(loss, optimizer) as scaled_loss:
scaled_loss.backward()
optimizer.step()
# Once more to make sure the new params pick up momentums properly
self.zero_grad([model0, model1], optimizer, how_to_zero)
loss = model0(self.x) + model1(self.x)
with amp.scale_loss(loss, optimizer) as scaled_loss:
scaled_loss.backward(retain_graph=try_accumulation)
if try_accumulation:
with amp.scale_loss(loss, optimizer) as scaled_loss:
scaled_loss.backward()
optimizer.step()
final_params = [param.data.clone() for param in model0.parameters()] + \
[param.data.clone() for param in model1.parameters()]
for reference, final in zip(reference_params, final_params):
self.assertTrue(torch.allclose(reference.to(final.dtype), final),
"opt_level = {}, how_to_zero = {}, zero_before_add = {}".format(
opt_level, how_to_zero, zero_before_add))
if __name__ == '__main__':
unittest.main()
...@@ -86,7 +86,6 @@ def fast_collate(batch): ...@@ -86,7 +86,6 @@ def fast_collate(batch):
tensor = torch.zeros( (len(imgs), 3, h, w), dtype=torch.uint8 ) tensor = torch.zeros( (len(imgs), 3, h, w), dtype=torch.uint8 )
for i, img in enumerate(imgs): for i, img in enumerate(imgs):
nump_array = np.asarray(img, dtype=np.uint8) nump_array = np.asarray(img, dtype=np.uint8)
tens = torch.from_numpy(nump_array)
if(nump_array.ndim < 3): if(nump_array.ndim < 3):
nump_array = np.expand_dims(nump_array, axis=-1) nump_array = np.expand_dims(nump_array, axis=-1)
nump_array = np.rollaxis(nump_array, 2) nump_array = np.rollaxis(nump_array, 2)
......
...@@ -16,6 +16,7 @@ images=( ...@@ -16,6 +16,7 @@ images=(
"gitlab-master.nvidia.com:5005/dl/dgx/pytorch:19.03-py3-devel" "gitlab-master.nvidia.com:5005/dl/dgx/pytorch:19.03-py3-devel"
"gitlab-master.nvidia.com:5005/dl/dgx/pytorch:master-py3-devel" "gitlab-master.nvidia.com:5005/dl/dgx/pytorch:master-py3-devel"
"pytorch/pytorch:nightly-devel-cuda10.0-cudnn7" "pytorch/pytorch:nightly-devel-cuda10.0-cudnn7"
"pytorch/pytorch:1.1.0-cuda10.0-cudnn7.5-devel"
"pytorch/pytorch:1.0.1-cuda10.0-cudnn7-devel" "pytorch/pytorch:1.0.1-cuda10.0-cudnn7-devel"
"pytorch/pytorch:1.0-cuda10.0-cudnn7-devel" "pytorch/pytorch:1.0-cuda10.0-cudnn7-devel"
"pytorch/pytorch:nightly-devel-cuda9.2-cudnn7" "pytorch/pytorch:nightly-devel-cuda9.2-cudnn7"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment