data.py 4.48 KB
Newer Older
Benjamin Thomas Graham's avatar
Benjamin Thomas Graham committed
1
2
3
4
5
6
7
8
# Copyright 2016-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.

import torch
import torchnet
Benjamin Thomas Graham's avatar
Benjamin Thomas Graham committed
9
import sparseconvnet as scn
Benjamin Thomas Graham's avatar
Benjamin Thomas Graham committed
10
11
12
import pickle
import math
import random
13
import numpy as np
Benjamin Thomas Graham's avatar
Benjamin Thomas Graham committed
14
15
16
17
18
19
20
21
import os

if not os.path.exists('pickle/'):
    print('Downloading and preprocessing data ...')
    os.system('bash process.sh')
    import process


22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
def interp(sample,x,y):
    return torch.from_numpy(np.hstack([np.interp(sample.numpy(),x.numpy(),y[:,i].numpy())[:,None] for i in range(y.shape[1])])).float()
class Data(torch.utils.data.Dataset):
    def __init__(self,file,scale=63,repeats=1):
        torch.utils.data.Dataset.__init__(self)
        self.data = pickle.load(open(file, 'rb'))
        for j in range(len(self.data)):
            strokes=[]
            features=[]
            for k,stroke in enumerate(self.data[j]['input']):
                if len(stroke)>1:
                    stroke=stroke.float()/255-0.5
                    stroke*=scale-1e-3
                    delta=stroke[1:]-stroke[:-1]
                    mag=(delta**2).sum(1)**0.5
                    l=mag.cumsum(0)
                    zl=torch.cat([torch.zeros(1),l])
                    strokes.append(interp(torch.arange(0,zl[-1]),zl,stroke))
                    delta/=mag[:,None]
                    delta=torch.Tensor(delta[[i//2 for i in range(2*len(l))]])
                    zl_=zl[[i//2 for i in range(1,2*len(l)+1)]]
                    features.append(interp(torch.arange(0,zl[-1]),zl_,delta))
            self.data[j]['coords'] = torch.cat(strokes,0)
            self.data[j]['features'] = torch.cat(features,0)
            self.data[j]['target']-=1
        if repeats>1:
            print('Replicating dataset: 1 epoch = %d iterations of the dataset; %d x %d = %d training samples'%(repeats, repeats, len(self.data), repeats * len(self.data)))
        for j in range(len(self.data)):
            for i in range(repeats-1):
                self.data.append(self.data[j])
        for i, x in enumerate(self.data):
            x['idx'] = i
    def __getitem__(self,n):
        return self.data[n]
    def __len__(self):
        return len(self.data)
def TrainMergeFn(spatial_size=95, jitter=8):
    center = spatial_size/2
Benjamin Thomas Graham's avatar
Benjamin Thomas Graham committed
60
    def merge(tbl):
61
62
63
64
65
        v=torch.Tensor([[1,0,0]])
        targets=[x['target'] for x in tbl]
        locations=[]
        features=[]
        for idx,char in enumerate(tbl):
Benjamin Thomas Graham's avatar
Benjamin Thomas Graham committed
66
            m = torch.eye(2)
67
68
69
            r = torch.randint(0,3,[1]).int().item()
            alpha = torch.rand(1).item()*0.4-0.2
            if r == 1:
Benjamin Thomas Graham's avatar
Benjamin Thomas Graham committed
70
                m[0][1] = alpha
71
            elif r == 2:
Benjamin Thomas Graham's avatar
Benjamin Thomas Graham committed
72
73
74
75
76
                m[1][0] = alpha
            else:
                m = torch.mm(m, torch.FloatTensor(
                    [[math.cos(alpha), math.sin(alpha)],
                     [-math.sin(alpha), math.cos(alpha)]]))
77
78
79
80
81
82
83
84
85
86
87
88
89
            coords=char['coords']
            coords = torch.mm(coords, m) + torch.FloatTensor(1, 2).uniform_(center-jitter, center+jitter)
            coords = torch.cat([coords.long(),torch.LongTensor([idx]).expand([coords.size(0),1])],1)
            locations.append(coords)
            f=char['features']
            f=torch.mm(f, m)
            f /= (f**2).sum(1,keepdim=True)**0.5
            f = torch.cat([f,torch.ones([f.size(0),1])],1)
            features.append(f)
        return {'input': scn.InputLayerInput(torch.cat(locations,0), torch.cat(features,0)), 'target': torch.LongTensor(targets)}
    return merge
def TestMergeFn(spatial_size=95):
    center = spatial_size/2
Benjamin Thomas Graham's avatar
Benjamin Thomas Graham committed
90
    def merge(tbl):
91
92
93
94
95
96
97
98
99
100
101
102
103
        v=torch.Tensor([[1,0,0]])
        targets=[x['target'] for x in tbl]
        locations=[]
        features=[]
        for idx,char in enumerate(tbl):
            coords=char['coords']+center
            coords = torch.cat([coords.long(),torch.LongTensor([idx]).expand([coords.size(0),1])],1)
            locations.append(coords)
            f=char['features']
            f = torch.cat([f,torch.ones([f.size(0),1])],1)
            features.append(f)
        return {'input': scn.InputLayerInput(torch.cat(locations,0), torch.cat(features,0)), 'target': torch.LongTensor(targets)}
    return merge
Benjamin Thomas Graham's avatar
Benjamin Thomas Graham committed
104
105


Benjamin Thomas Graham's avatar
tidy  
Benjamin Thomas Graham committed
106
def get_iterators(*args):
107
108
    return {'train': torch.utils.data.DataLoader(Data('pickle/train.pickle',repeats=10), collate_fn=TrainMergeFn(), batch_size=108, shuffle=True, num_workers=10),
            'val': torch.utils.data.DataLoader(Data('pickle/test.pickle',repeats=1), collate_fn=TestMergeFn(), batch_size=183, shuffle=True, num_workers=10)}