"vscode:/vscode.git/clone" did not exist on "b8cd09f27aaee18f90424f8baf74e936269428a0"
save_load.py 1.84 KB
Newer Older
Michael Carilli's avatar
Michael Carilli committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import torch
from torch.autograd import Variable
from apex.fp16_utils import FP16_Optimizer

torch.backends.cudnn.benchmark = True

N, D_in, D_out = 64, 1024, 16

x = Variable(torch.cuda.FloatTensor(N, D_in ).normal_()).half()
y = Variable(torch.cuda.FloatTensor(N, D_out).normal_()).half()

model = torch.nn.Linear(D_in, D_out).cuda().half()

optimizer = torch.optim.SGD(model.parameters(), lr=1e-3, momentum=0.9)
### Construct FP16_Optimizer with static loss scaling...
optimizer = FP16_Optimizer(optimizer, static_loss_scale=128.0)
### ...or dynamic loss scaling
# optimizer = FP16_Optimizer(optimizer, dynamic_loss_scale=True)

loss_fn = torch.nn.MSELoss()

# The checkpointing shown here is identical to what you'd use without FP16_Optimizer.
#
# We save/load checkpoints within local scopes, so the "checkpoint" object
# does not persist.  This helps avoid dangling references to intermediate deserialized data,
# and is good practice for Pytorch in general, not just with FP16_Optimizer.
def save_checkpoint():
    checkpoint = {}
    checkpoint['model'] = model.state_dict()
    checkpoint['optimizer'] = optimizer.state_dict()
    torch.save(checkpoint, 'saved.pth')

def load_checkpoint():
    checkpoint = torch.load('saved.pth', 
        map_location = lambda storage, loc: storage.cuda(torch.cuda.current_device()))
    model.load_state_dict(checkpoint['model'])
    optimizer.load_state_dict(checkpoint['optimizer'])

for t in range(100):
    optimizer.zero_grad()
    y_pred = model(x)
    loss = loss_fn(y_pred.float(), y.float())
    optimizer.backward(loss) ### formerly loss.backward()
    optimizer.step()

save_checkpoint()

load_checkpoint()

for t in range(100):
    optimizer.zero_grad()
    y_pred = model(x)
    loss = loss_fn(y_pred.float(), y.float())
    optimizer.backward(loss) ### formerly loss.backward()
    optimizer.step()

print("final loss = ", loss)