data.py 3.28 KB
Newer Older
Benjamin Thomas Graham's avatar
utils  
Benjamin Thomas Graham committed
1

Benjamin Thomas Graham's avatar
Benjamin Thomas Graham committed
2
3
4
# Copyright 2016-present, Facebook, Inc.
# All rights reserved.
#
Benjamin Graham's avatar
Benjamin Graham committed
5
# This source code is licensed under the BSD-style license found in the
Benjamin Thomas Graham's avatar
Benjamin Thomas Graham committed
6
7
8
9
# LICENSE file in the root directory of this source tree.

import torch
import torchnet
Benjamin Thomas Graham's avatar
Benjamin Thomas Graham committed
10
import sparseconvnet as scn
Benjamin Thomas Graham's avatar
Benjamin Thomas Graham committed
11
12
13
import pickle
import math
import random
14
import numpy as np
Benjamin Thomas Graham's avatar
Benjamin Thomas Graham committed
15
16
17
18
19
20
21
22
import os

if not os.path.exists('pickle/'):
    print('Downloading and preprocessing data ...')
    os.system(
        'wget http://www.nlpr.ia.ac.cn/databases/download/feature_data/OLHWDB1.1trn_pot.zip')
    os.system(
        'wget http://www.nlpr.ia.ac.cn/databases/download/feature_data/OLHWDB1.1tst_pot.zip')
Benjamin Thomas Graham's avatar
utils  
Benjamin Thomas Graham committed
23
    os.system('mkdir -p POT/ pickle/')
Benjamin Thomas Graham's avatar
Benjamin Thomas Graham committed
24
25
    os.system('unzip OLHWDB1.1trn_pot.zip -d POT/')
    os.system('unzip OLHWDB1.1tst_pot.zip -d POT/')
Benjamin Thomas Graham's avatar
Benjamin Thomas Graham committed
26
    os.system('python readPotFiles.py')
Benjamin Thomas Graham's avatar
Benjamin Thomas Graham committed
27

28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
def interp(sample,x,y):
    return torch.from_numpy(np.hstack([np.interp(sample.numpy(),x.numpy(),y[:,i].numpy())[:,None] for i in range(y.shape[1])])).float()
class Data(torch.utils.data.Dataset):
    def __init__(self,file,scale=63):
        print('Loading', file, 'and balancing points for scale', scale)
        torch.utils.data.Dataset.__init__(self)
        self.data = pickle.load(open(file, 'rb'))
        for j in range(len(self.data)):
            strokes=[]
            features=[]
            for k,stroke in enumerate(self.data[j]['input']):
                if len(stroke)>1:
                    stroke=stroke.float()/255-0.5
                    stroke*=scale-1e-3
                    delta=stroke[1:]-stroke[:-1]
                    mag=(delta**2).sum(1)**0.5
                    l=mag.cumsum(0)
                    zl=torch.cat([torch.zeros(1),l])
                    strokes.append(interp(torch.arange(0,zl[-1]),zl,stroke))
                    delta/=mag[:,None]
                    delta=torch.Tensor(delta[[i//2 for i in range(2*len(l))]])
                    zl_=zl[[i//2 for i in range(1,2*len(l)+1)]]
                    features.append(interp(torch.arange(0,zl[-1]),zl_,delta))
            self.data[j]['coords'] = torch.cat(strokes,0)
            self.data[j]['features'] = torch.cat(features,0)
        for i, x in enumerate(self.data):
            x['idx'] = i
        print('Loaded', len(self.data), 'points')
    def __getitem__(self,n):
        return self.data[n]
    def __len__(self):
        return len(self.data)
Benjamin Thomas Graham's avatar
Benjamin Thomas Graham committed
60

61
62
def MergeFn(spatial_size=63):
    center = spatial_size/2
Benjamin Thomas Graham's avatar
Benjamin Thomas Graham committed
63
    def merge(tbl):
64
65
66
67
68
69
70
71
72
73
74
75
76
        v=torch.Tensor([[1,0,0]])
        targets=[x['target'] for x in tbl]
        locations=[]
        features=[]
        for idx,char in enumerate(tbl):
            coords=char['coords']+center
            coords = torch.cat([coords.long(),torch.LongTensor([idx]).expand([coords.size(0),1])],1)
            locations.append(coords)
            f=char['features']
            f = torch.cat([f,torch.ones([f.size(0),1])],1)
            features.append(f)
        return {'input': scn.InputLayerInput(torch.cat(locations,0), torch.cat(features,0)), 'target': torch.LongTensor(targets)}
    return merge
Benjamin Thomas Graham's avatar
Benjamin Thomas Graham committed
77
78


Benjamin Thomas Graham's avatar
tidy  
Benjamin Thomas Graham committed
79
def get_iterators(*args):
80
81
    return {'train': torch.utils.data.DataLoader(Data('pickle/train.pickle'), collate_fn=MergeFn(), batch_size=100, shuffle=True, num_workers=10),
            'val': torch.utils.data.DataLoader(Data('pickle/test.pickle'), collate_fn=MergeFn(), batch_size=100, shuffle=True, num_workers=10)}