data.py 3.29 KB
Newer Older
Benjamin Thomas Graham's avatar
Benjamin Thomas Graham committed
1
2
3
4
5
6
7
8
# Copyright 2016-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.

import torch
import torchnet
Benjamin Thomas Graham's avatar
Benjamin Thomas Graham committed
9
import sparseconvnet as scn
Benjamin Thomas Graham's avatar
Benjamin Thomas Graham committed
10
11
12
import pickle
import math
import random
13
import numpy as np
Benjamin Thomas Graham's avatar
Benjamin Thomas Graham committed
14
15
16
17
18
19
20
21
22
23
24
import os

if not os.path.exists('pickle/'):
    print('Downloading and preprocessing data ...')
    os.system(
        'wget http://www.nlpr.ia.ac.cn/databases/download/feature_data/OLHWDB1.1trn_pot.zip')
    os.system(
        'wget http://www.nlpr.ia.ac.cn/databases/download/feature_data/OLHWDB1.1tst_pot.zip')
    os.system('mkdir -p t7/train/ t7/test/ POT/ pickle/')
    os.system('unzip OLHWDB1.1trn_pot.zip -d POT/')
    os.system('unzip OLHWDB1.1tst_pot.zip -d POT/')
Benjamin Thomas Graham's avatar
Benjamin Thomas Graham committed
25
    os.system('python readPotFiles.py')
Benjamin Thomas Graham's avatar
Benjamin Thomas Graham committed
26

27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
def interp(sample,x,y):
    return torch.from_numpy(np.hstack([np.interp(sample.numpy(),x.numpy(),y[:,i].numpy())[:,None] for i in range(y.shape[1])])).float()
class Data(torch.utils.data.Dataset):
    def __init__(self,file,scale=63):
        print('Loading', file, 'and balancing points for scale', scale)
        torch.utils.data.Dataset.__init__(self)
        self.data = pickle.load(open(file, 'rb'))
        for j in range(len(self.data)):
            strokes=[]
            features=[]
            for k,stroke in enumerate(self.data[j]['input']):
                if len(stroke)>1:
                    stroke=stroke.float()/255-0.5
                    stroke*=scale-1e-3
                    delta=stroke[1:]-stroke[:-1]
                    mag=(delta**2).sum(1)**0.5
                    l=mag.cumsum(0)
                    zl=torch.cat([torch.zeros(1),l])
                    strokes.append(interp(torch.arange(0,zl[-1]),zl,stroke))
                    delta/=mag[:,None]
                    delta=torch.Tensor(delta[[i//2 for i in range(2*len(l))]])
                    zl_=zl[[i//2 for i in range(1,2*len(l)+1)]]
                    features.append(interp(torch.arange(0,zl[-1]),zl_,delta))
            self.data[j]['coords'] = torch.cat(strokes,0)
            self.data[j]['features'] = torch.cat(features,0)
        for i, x in enumerate(self.data):
            x['idx'] = i
        print('Loaded', len(self.data), 'points')
    def __getitem__(self,n):
        return self.data[n]
    def __len__(self):
        return len(self.data)
Benjamin Thomas Graham's avatar
Benjamin Thomas Graham committed
59

60
61
def MergeFn(spatial_size=63):
    center = spatial_size/2
Benjamin Thomas Graham's avatar
Benjamin Thomas Graham committed
62
    def merge(tbl):
63
64
65
66
67
68
69
70
71
72
73
74
75
        v=torch.Tensor([[1,0,0]])
        targets=[x['target'] for x in tbl]
        locations=[]
        features=[]
        for idx,char in enumerate(tbl):
            coords=char['coords']+center
            coords = torch.cat([coords.long(),torch.LongTensor([idx]).expand([coords.size(0),1])],1)
            locations.append(coords)
            f=char['features']
            f = torch.cat([f,torch.ones([f.size(0),1])],1)
            features.append(f)
        return {'input': scn.InputLayerInput(torch.cat(locations,0), torch.cat(features,0)), 'target': torch.LongTensor(targets)}
    return merge
Benjamin Thomas Graham's avatar
Benjamin Thomas Graham committed
76
77


Benjamin Thomas Graham's avatar
tidy  
Benjamin Thomas Graham committed
78
def get_iterators(*args):
79
80
    return {'train': torch.utils.data.DataLoader(Data('pickle/train.pickle'), collate_fn=MergeFn(), batch_size=100, shuffle=True, num_workers=10),
            'val': torch.utils.data.DataLoader(Data('pickle/test.pickle'), collate_fn=MergeFn(), batch_size=100, shuffle=True, num_workers=10)}