Mercurial > pylearn
comparison mlp_factory_approach.py @ 191:e816821c1e50
added early stopping to mlp.__call__
author | James Bergstra <bergstrj@iro.umontreal.ca> |
---|---|
date | Wed, 14 May 2008 20:04:44 -0400 |
parents | aa7a3ecbcc90 |
children | c5a7105fa40b |
comparison
equal
deleted
inserted
replaced
190:aa7a3ecbcc90 | 191:e816821c1e50 |
---|---|
1 import copy | 1 import copy, sys |
2 import numpy | 2 import numpy |
3 | 3 |
4 import theano | 4 import theano |
5 import theano.tensor as t | 5 from theano import tensor as t |
6 | 6 |
7 import dataset | 7 from tlearn import dataset, nnet_ops, stopper |
8 import nnet_ops | |
9 | 8 |
10 def _randshape(*shape): | 9 def _randshape(*shape): |
11 return (numpy.random.rand(*shape) -0.5) * 0.001 | 10 return (numpy.random.rand(*shape) -0.5) * 0.001 |
12 | 11 |
13 class NeuralNet(object): | 12 def _cache(d, key, valfn): |
13 #valfn() is only evaluated if key isn't in dictionary d | |
14 if key not in d: | |
15 d[key] = valfn() | |
16 return d[key] | |
14 | 17 |
15 class _Model(object): | 18 class _Model(object): |
16 def __init__(self, nnet, params): | 19 def __init__(self, algo, params): |
17 self.nnet = nnet | 20 self.algo = algo |
18 self.params = params | 21 self.params = params |
22 v = algo.v | |
23 self.update_fn = algo._fn([v.input, v.target] + v.params, [v.nll] + v.new_params) | |
24 self._fn_cache = {} | |
19 | 25 |
20 def __copy__(self): | 26 def __copy__(self): |
21 return _Model(self.nnet, [copy.copy(p) for p in params]) | 27 return _Model(self.algo, [copy.copy(p) for p in params]) |
22 | 28 |
23 def update(self, trainset, stopper=None): | 29 def update(self, input_target): |
24 """Update this model from more training data.""" | 30 """Update this model from more training data.""" |
25 v = self.nnet.v | 31 params = self.params |
26 params = self.params | 32 #TODO: why should we have to unpack target like this? |
27 update_fn = self.nnet._fn([v.input, v.target] + v.params, [v.nll] + v.new_params) | 33 for input, target in input_target: |
28 if stopper is not None: | 34 self.update_fn(input, target[:,0], *params) |
29 raise NotImplementedError() | |
30 else: | |
31 for i in xrange(100): | |
32 for input, target in trainset.minibatches(['input', 'target'], | |
33 minibatch_size=min(32, len(trainset))): | |
34 dummy = update_fn(input, target[:,0], *params) | |
35 if 0: print dummy[0] #the nll | |
36 | 35 |
37 def __call__(self, testset, | 36 def __call__(self, testset, fieldnames=['output_class']): |
38 output_fieldnames=['output_class'], | 37 """Apply this model (as a function) to new data""" |
39 test_stats_collector=None, | 38 #TODO: cache fn between calls |
40 copy_inputs=False, | 39 assert 'input' == testset.fieldNames()[0] |
41 put_stats_in_output_dataset=True, | 40 assert len(testset.fieldNames()) <= 2 |
42 output_attributes=[]): | 41 v = self.algo.v |
43 """Apply this model (as a function) to new data""" | 42 outputs = [getattr(v, name) for name in fieldnames] |
44 v = self.nnet.v | 43 inputs = [v.input] + ([v.target] if 'target' in testset else []) |
45 outputs = [getattr(self.nnet.v, name) for name in output_fieldnames] | 44 inputs.extend(v.params) |
46 if 'target' in testset: | 45 theano_fn = _cache(self._fn_cache, (tuple(inputs), tuple(outputs)), |
47 fn = self.nnet._fn([v.input, v.target] + v.params, outputs) | 46 lambda: self.algo._fn(inputs, outputs)) |
48 return dataset.ApplyFunctionDataSet(testset, | 47 lambda_fn = lambda *args: theano_fn(*(list(args) + self.params)) |
49 lambda input, target: fn(input, target[:,0], *self.params), | 48 return dataset.ApplyFunctionDataSet(testset, lambda_fn, fieldnames) |
50 output_fieldnames) | |
51 else: | |
52 fn = self.nnet._fn([v.input] + v.params, outputs) | |
53 return dataset.ApplyFunctionDataSet(testset, | |
54 lambda input: fn(input, *self.params), | |
55 output_fieldnames) | |
56 def _fn(self, inputs, outputs): | |
57 #it is possible for this function to implement function caching | |
58 #... but not necessarily desirable. | |
59 #- caching ruins the possibility of multi-threaded learning | |
60 #- caching demands more efficiency in the face of resizing inputs | |
61 #- caching makes it really hard to borrow references to function outputs | |
62 return theano.function(inputs, outputs, unpack_single=False, linker=self.linker) | |
63 | 49 |
64 def __init__(self, ninputs, nhid, nclass, lr, nepochs, | 50 class AutonameVars(object): |
51 def __init__(self, dct): | |
52 for key, val in dct.items(): | |
53 if type(key) is str and hasattr(val, 'name'): | |
54 val.name = key | |
55 self.__dict__.update(dct) | |
56 | |
57 class MultiLayerPerceptron(object): | |
58 | |
59 def __init__(self, ninputs, nhid, nclass, lr, | |
65 l2coef=0.0, | 60 l2coef=0.0, |
66 linker='c&py', | 61 linker='c&py', |
67 hidden_layer=None): | 62 hidden_layer=None, |
68 class Vars: | 63 early_stopper=None, |
69 def __init__(self, lr, l2coef): | 64 validation_portion=0.2, |
65 V_extern=None): | |
66 class V_intern(AutonameVars): | |
67 def __init__(v_self, lr, l2coef, **kwargs): | |
70 lr = t.constant(lr) | 68 lr = t.constant(lr) |
71 l2coef = t.constant(l2coef) | 69 l2coef = t.constant(l2coef) |
72 input = t.matrix('input') # n_examples x n_inputs | 70 input = t.matrix() # n_examples x n_inputs |
73 target = t.ivector('target') # n_examples x 1 | 71 target = t.ivector() # len: n_examples |
74 W2 = t.matrix('W2') | 72 W2, b2 = t.matrix(), t.vector() |
75 b2 = t.vector('b2') | |
76 | 73 |
77 if hidden_layer: | 74 if hidden_layer: |
78 hid, hid_params, hid_ivals, hid_regularization = hidden_layer(input) | 75 hid, hid_params, hid_ivals, hid_regularization = hidden_layer(input) |
79 else: | 76 else: |
80 W1 = t.matrix('W1') | 77 W1, b1 = t.matrix(), t.vector() |
81 b1 = t.vector('b1') | |
82 hid = t.tanh(b1 + t.dot(input, W1)) | 78 hid = t.tanh(b1 + t.dot(input, W1)) |
83 hid_params = [W1, b1] | 79 hid_params = [W1, b1] |
84 hid_regularization = l2coef * t.sum(W1*W1) | 80 hid_regularization = l2coef * t.sum(W1*W1) |
85 hid_ivals = lambda : [_randshape(ninputs, nhid), _randshape(nhid)] | 81 hid_ivals = lambda : [_randshape(ninputs, nhid), _randshape(nhid)] |
86 | 82 |
91 output_class = t.argmax(activations,1) | 87 output_class = t.argmax(activations,1) |
92 loss_01 = t.neq(output_class, target) | 88 loss_01 = t.neq(output_class, target) |
93 g_params = t.grad(nll + regularization, params) | 89 g_params = t.grad(nll + regularization, params) |
94 new_params = [t.sub_inplace(p, lr * gp) for p,gp in zip(params, g_params)] | 90 new_params = [t.sub_inplace(p, lr * gp) for p,gp in zip(params, g_params)] |
95 self.__dict__.update(locals()); del self.self | 91 self.__dict__.update(locals()); del self.self |
92 AutonameVars.__init__(v_self, locals()) | |
96 self.nhid = nhid | 93 self.nhid = nhid |
97 self.nclass = nclass | 94 self.nclass = nclass |
98 self.nepochs = nepochs | 95 self.v = V_intern(**locals()) if V_extern is None else V_extern(**locals()) |
99 self.v = Vars(lr, l2coef) | |
100 self.params = None | |
101 self.linker = linker | 96 self.linker = linker |
97 self.early_stopper = early_stopper if early_stopper is not None else lambda: stopper.NStages(10,1) | |
98 self.validation_portion = validation_portion | |
99 | |
100 def _fn(self, inputs, outputs): | |
101 # Caching here would hamper multi-threaded apps | |
102 # prefer caching in _Model.__call__ | |
103 return theano.function(inputs, outputs, unpack_single=False, linker=self.linker) | |
102 | 104 |
103 def __call__(self, trainset=None, iparams=None): | 105 def __call__(self, trainset=None, iparams=None): |
106 """Allocate and optionally train a model""" | |
104 if iparams is None: | 107 if iparams is None: |
105 iparams = [_randshape(self.nhid, self.nclass), _randshape(self.nclass)]\ | 108 iparams = [_randshape(self.nhid, self.nclass), _randshape(self.nclass)]\ |
106 + self.v.hid_ivals() | 109 + self.v.hid_ivals() |
107 rval = NeuralNet._Model(self, iparams) | 110 rval = _Model(self, iparams) |
108 if trainset: | 111 if trainset: |
109 rval.update(trainset) | 112 if len(trainset) == sys.maxint: |
113 raise NotImplementedError('Learning from infinite streams is not supported') | |
114 nval = int(self.validation_portion * len(trainset)) | |
115 nmin = len(trainset) - nval | |
116 assert nmin >= 0 | |
117 minset = trainset[:nmin] #real training set for minimizing loss | |
118 valset = trainset[nmin:] #validation set for early stopping | |
119 best = rval | |
120 for stp in self.early_stopper(): | |
121 rval.update( | |
122 trainset.minibatches(['input', 'target'], minibatch_size=min(32, | |
123 len(trainset)))) | |
124 if stp.set_score: | |
125 stp.score = rval(valset, ['loss_01']) | |
126 if (stp.score < stp.best_score): | |
127 best = copy.copy(rval) | |
128 rval = best | |
110 return rval | 129 return rval |
111 | 130 |
112 | 131 |
132 import unittest | |
133 | |
134 class TestMLP(unittest.TestCase): | |
135 def test0(self): | |
136 | |
137 training_set1 = dataset.ArrayDataSet(numpy.array([[0, 0, 0], | |
138 [0, 1, 1], | |
139 [1, 0, 1], | |
140 [1, 1, 1]]), | |
141 {'input':slice(2),'target':2}) | |
142 training_set2 = dataset.ArrayDataSet(numpy.array([[0, 0, 0], | |
143 [0, 1, 1], | |
144 [1, 0, 0], | |
145 [1, 1, 1]]), | |
146 {'input':slice(2),'target':2}) | |
147 test_data = dataset.ArrayDataSet(numpy.array([[0, 0, 0], | |
148 [0, 1, 1], | |
149 [1, 0, 0], | |
150 [1, 1, 1]]), | |
151 {'input':slice(2)}) | |
152 | |
153 learn_algo = MultiLayerPerceptron(2, 10, 2, .1 | |
154 , linker='c&py' | |
155 , early_stopper = lambda:stopper.NStages(100,1)) | |
156 | |
157 model1 = learn_algo(training_set1) | |
158 | |
159 model2 = learn_algo(training_set2) | |
160 | |
161 n_match = 0 | |
162 for o1, o2 in zip(model1(test_data), model2(test_data)): | |
163 #print o1 | |
164 #print o2 | |
165 n_match += (o1 == o2) | |
166 | |
167 assert n_match == (numpy.sum(training_set1.fields()['target'] == | |
168 training_set2.fields()['target'])) | |
169 | |
113 if __name__ == '__main__': | 170 if __name__ == '__main__': |
114 training_set1 = dataset.ArrayDataSet(numpy.array([[0, 0, 0], | 171 unittest.main() |
115 [0, 1, 1], | |
116 [1, 0, 1], | |
117 [1, 1, 1]]), | |
118 {'input':slice(2),'target':2}) | |
119 training_set2 = dataset.ArrayDataSet(numpy.array([[0, 0, 0], | |
120 [0, 1, 1], | |
121 [1, 0, 0], | |
122 [1, 1, 1]]), | |
123 {'input':slice(2),'target':2}) | |
124 test_data = dataset.ArrayDataSet(numpy.array([[0, 0, 0], | |
125 [0, 1, 1], | |
126 [1, 0, 0], | |
127 [1, 1, 1]]), | |
128 {'input':slice(2)}) | |
129 | 172 |
130 learn_algo = NeuralNet(2, 10, 3, .1, 1000) | |
131 | |
132 model1 = learn_algo(training_set1) | |
133 | |
134 model2 = learn_algo(training_set2) | |
135 | |
136 n_match = 0 | |
137 for o1, o2 in zip(model1(test_data), model2(test_data)): | |
138 n_match += (o1 == o2) | |
139 | |
140 print n_match, numpy.sum(training_set1.fields()['target'] == | |
141 training_set2.fields()['target']) | |
142 |