comparison mlp_factory_approach.py @ 191:e816821c1e50

added early stopping to mlp.__call__
author James Bergstra <bergstrj@iro.umontreal.ca>
date Wed, 14 May 2008 20:04:44 -0400
parents aa7a3ecbcc90
children c5a7105fa40b
comparison
equal deleted inserted replaced
190:aa7a3ecbcc90 191:e816821c1e50
1 import copy 1 import copy, sys
2 import numpy 2 import numpy
3 3
4 import theano 4 import theano
5 import theano.tensor as t 5 from theano import tensor as t
6 6
7 import dataset 7 from tlearn import dataset, nnet_ops, stopper
8 import nnet_ops
9 8
10 def _randshape(*shape): 9 def _randshape(*shape):
11 return (numpy.random.rand(*shape) -0.5) * 0.001 10 return (numpy.random.rand(*shape) -0.5) * 0.001
12 11
13 class NeuralNet(object): 12 def _cache(d, key, valfn):
13 #valfn() is only evaluated if key isn't in dictionary d
14 if key not in d:
15 d[key] = valfn()
16 return d[key]
14 17
15 class _Model(object): 18 class _Model(object):
16 def __init__(self, nnet, params): 19 def __init__(self, algo, params):
17 self.nnet = nnet 20 self.algo = algo
18 self.params = params 21 self.params = params
22 v = algo.v
23 self.update_fn = algo._fn([v.input, v.target] + v.params, [v.nll] + v.new_params)
24 self._fn_cache = {}
19 25
20 def __copy__(self): 26 def __copy__(self):
21 return _Model(self.nnet, [copy.copy(p) for p in params]) 27 return _Model(self.algo, [copy.copy(p) for p in params])
22 28
23 def update(self, trainset, stopper=None): 29 def update(self, input_target):
24 """Update this model from more training data.""" 30 """Update this model from more training data."""
25 v = self.nnet.v 31 params = self.params
26 params = self.params 32 #TODO: why should we have to unpack target like this?
27 update_fn = self.nnet._fn([v.input, v.target] + v.params, [v.nll] + v.new_params) 33 for input, target in input_target:
28 if stopper is not None: 34 self.update_fn(input, target[:,0], *params)
29 raise NotImplementedError()
30 else:
31 for i in xrange(100):
32 for input, target in trainset.minibatches(['input', 'target'],
33 minibatch_size=min(32, len(trainset))):
34 dummy = update_fn(input, target[:,0], *params)
35 if 0: print dummy[0] #the nll
36 35
37 def __call__(self, testset, 36 def __call__(self, testset, fieldnames=['output_class']):
38 output_fieldnames=['output_class'], 37 """Apply this model (as a function) to new data"""
39 test_stats_collector=None, 38 #TODO: cache fn between calls
40 copy_inputs=False, 39 assert 'input' == testset.fieldNames()[0]
41 put_stats_in_output_dataset=True, 40 assert len(testset.fieldNames()) <= 2
42 output_attributes=[]): 41 v = self.algo.v
43 """Apply this model (as a function) to new data""" 42 outputs = [getattr(v, name) for name in fieldnames]
44 v = self.nnet.v 43 inputs = [v.input] + ([v.target] if 'target' in testset else [])
45 outputs = [getattr(self.nnet.v, name) for name in output_fieldnames] 44 inputs.extend(v.params)
46 if 'target' in testset: 45 theano_fn = _cache(self._fn_cache, (tuple(inputs), tuple(outputs)),
47 fn = self.nnet._fn([v.input, v.target] + v.params, outputs) 46 lambda: self.algo._fn(inputs, outputs))
48 return dataset.ApplyFunctionDataSet(testset, 47 lambda_fn = lambda *args: theano_fn(*(list(args) + self.params))
49 lambda input, target: fn(input, target[:,0], *self.params), 48 return dataset.ApplyFunctionDataSet(testset, lambda_fn, fieldnames)
50 output_fieldnames)
51 else:
52 fn = self.nnet._fn([v.input] + v.params, outputs)
53 return dataset.ApplyFunctionDataSet(testset,
54 lambda input: fn(input, *self.params),
55 output_fieldnames)
56 def _fn(self, inputs, outputs):
57 #it is possible for this function to implement function caching
58 #... but not necessarily desirable.
59 #- caching ruins the possibility of multi-threaded learning
60 #- caching demands more efficiency in the face of resizing inputs
61 #- caching makes it really hard to borrow references to function outputs
62 return theano.function(inputs, outputs, unpack_single=False, linker=self.linker)
63 49
64 def __init__(self, ninputs, nhid, nclass, lr, nepochs, 50 class AutonameVars(object):
51 def __init__(self, dct):
52 for key, val in dct.items():
53 if type(key) is str and hasattr(val, 'name'):
54 val.name = key
55 self.__dict__.update(dct)
56
57 class MultiLayerPerceptron(object):
58
59 def __init__(self, ninputs, nhid, nclass, lr,
65 l2coef=0.0, 60 l2coef=0.0,
66 linker='c&py', 61 linker='c&py',
67 hidden_layer=None): 62 hidden_layer=None,
68 class Vars: 63 early_stopper=None,
69 def __init__(self, lr, l2coef): 64 validation_portion=0.2,
65 V_extern=None):
66 class V_intern(AutonameVars):
67 def __init__(v_self, lr, l2coef, **kwargs):
70 lr = t.constant(lr) 68 lr = t.constant(lr)
71 l2coef = t.constant(l2coef) 69 l2coef = t.constant(l2coef)
72 input = t.matrix('input') # n_examples x n_inputs 70 input = t.matrix() # n_examples x n_inputs
73 target = t.ivector('target') # n_examples x 1 71 target = t.ivector() # len: n_examples
74 W2 = t.matrix('W2') 72 W2, b2 = t.matrix(), t.vector()
75 b2 = t.vector('b2')
76 73
77 if hidden_layer: 74 if hidden_layer:
78 hid, hid_params, hid_ivals, hid_regularization = hidden_layer(input) 75 hid, hid_params, hid_ivals, hid_regularization = hidden_layer(input)
79 else: 76 else:
80 W1 = t.matrix('W1') 77 W1, b1 = t.matrix(), t.vector()
81 b1 = t.vector('b1')
82 hid = t.tanh(b1 + t.dot(input, W1)) 78 hid = t.tanh(b1 + t.dot(input, W1))
83 hid_params = [W1, b1] 79 hid_params = [W1, b1]
84 hid_regularization = l2coef * t.sum(W1*W1) 80 hid_regularization = l2coef * t.sum(W1*W1)
85 hid_ivals = lambda : [_randshape(ninputs, nhid), _randshape(nhid)] 81 hid_ivals = lambda : [_randshape(ninputs, nhid), _randshape(nhid)]
86 82
91 output_class = t.argmax(activations,1) 87 output_class = t.argmax(activations,1)
92 loss_01 = t.neq(output_class, target) 88 loss_01 = t.neq(output_class, target)
93 g_params = t.grad(nll + regularization, params) 89 g_params = t.grad(nll + regularization, params)
94 new_params = [t.sub_inplace(p, lr * gp) for p,gp in zip(params, g_params)] 90 new_params = [t.sub_inplace(p, lr * gp) for p,gp in zip(params, g_params)]
95 self.__dict__.update(locals()); del self.self 91 self.__dict__.update(locals()); del self.self
92 AutonameVars.__init__(v_self, locals())
96 self.nhid = nhid 93 self.nhid = nhid
97 self.nclass = nclass 94 self.nclass = nclass
98 self.nepochs = nepochs 95 self.v = V_intern(**locals()) if V_extern is None else V_extern(**locals())
99 self.v = Vars(lr, l2coef)
100 self.params = None
101 self.linker = linker 96 self.linker = linker
97 self.early_stopper = early_stopper if early_stopper is not None else lambda: stopper.NStages(10,1)
98 self.validation_portion = validation_portion
99
100 def _fn(self, inputs, outputs):
101 # Caching here would hamper multi-threaded apps
102 # prefer caching in _Model.__call__
103 return theano.function(inputs, outputs, unpack_single=False, linker=self.linker)
102 104
103 def __call__(self, trainset=None, iparams=None): 105 def __call__(self, trainset=None, iparams=None):
106 """Allocate and optionally train a model"""
104 if iparams is None: 107 if iparams is None:
105 iparams = [_randshape(self.nhid, self.nclass), _randshape(self.nclass)]\ 108 iparams = [_randshape(self.nhid, self.nclass), _randshape(self.nclass)]\
106 + self.v.hid_ivals() 109 + self.v.hid_ivals()
107 rval = NeuralNet._Model(self, iparams) 110 rval = _Model(self, iparams)
108 if trainset: 111 if trainset:
109 rval.update(trainset) 112 if len(trainset) == sys.maxint:
113 raise NotImplementedError('Learning from infinite streams is not supported')
114 nval = int(self.validation_portion * len(trainset))
115 nmin = len(trainset) - nval
116 assert nmin >= 0
117 minset = trainset[:nmin] #real training set for minimizing loss
118 valset = trainset[nmin:] #validation set for early stopping
119 best = rval
120 for stp in self.early_stopper():
121 rval.update(
122 trainset.minibatches(['input', 'target'], minibatch_size=min(32,
123 len(trainset))))
124 if stp.set_score:
125 stp.score = rval(valset, ['loss_01'])
126 if (stp.score < stp.best_score):
127 best = copy.copy(rval)
128 rval = best
110 return rval 129 return rval
111 130
112 131
132 import unittest
133
134 class TestMLP(unittest.TestCase):
135 def test0(self):
136
137 training_set1 = dataset.ArrayDataSet(numpy.array([[0, 0, 0],
138 [0, 1, 1],
139 [1, 0, 1],
140 [1, 1, 1]]),
141 {'input':slice(2),'target':2})
142 training_set2 = dataset.ArrayDataSet(numpy.array([[0, 0, 0],
143 [0, 1, 1],
144 [1, 0, 0],
145 [1, 1, 1]]),
146 {'input':slice(2),'target':2})
147 test_data = dataset.ArrayDataSet(numpy.array([[0, 0, 0],
148 [0, 1, 1],
149 [1, 0, 0],
150 [1, 1, 1]]),
151 {'input':slice(2)})
152
153 learn_algo = MultiLayerPerceptron(2, 10, 2, .1
154 , linker='c&py'
155 , early_stopper = lambda:stopper.NStages(100,1))
156
157 model1 = learn_algo(training_set1)
158
159 model2 = learn_algo(training_set2)
160
161 n_match = 0
162 for o1, o2 in zip(model1(test_data), model2(test_data)):
163 #print o1
164 #print o2
165 n_match += (o1 == o2)
166
167 assert n_match == (numpy.sum(training_set1.fields()['target'] ==
168 training_set2.fields()['target']))
169
113 if __name__ == '__main__': 170 if __name__ == '__main__':
114 training_set1 = dataset.ArrayDataSet(numpy.array([[0, 0, 0], 171 unittest.main()
115 [0, 1, 1],
116 [1, 0, 1],
117 [1, 1, 1]]),
118 {'input':slice(2),'target':2})
119 training_set2 = dataset.ArrayDataSet(numpy.array([[0, 0, 0],
120 [0, 1, 1],
121 [1, 0, 0],
122 [1, 1, 1]]),
123 {'input':slice(2),'target':2})
124 test_data = dataset.ArrayDataSet(numpy.array([[0, 0, 0],
125 [0, 1, 1],
126 [1, 0, 0],
127 [1, 1, 1]]),
128 {'input':slice(2)})
129 172
130 learn_algo = NeuralNet(2, 10, 3, .1, 1000)
131
132 model1 = learn_algo(training_set1)
133
134 model2 = learn_algo(training_set2)
135
136 n_match = 0
137 for o1, o2 in zip(model1(test_data), model2(test_data)):
138 n_match += (o1 == o2)
139
140 print n_match, numpy.sum(training_set1.fields()['target'] ==
141 training_set2.fields()['target'])
142