comparison mlp_factory_approach.py @ 208:bf320808919f

back to James' version
author Yoshua Bengio <bengioy@iro.umontreal.ca>
date Fri, 16 May 2008 16:39:01 -0400
parents c5a7105fa40b
children bd728c83faff
comparison
equal deleted inserted replaced
207:c5a7105fa40b 208:bf320808919f
1 import dataset 1 import copy, sys
2 import numpy
3
2 import theano 4 import theano
3 import theano.tensor as t 5 from theano import tensor as t
4 import numpy 6
5 import nnet_ops 7 from tlearn import dataset, nnet_ops, stopper
6 8
7 def _randshape(*shape): 9 def _randshape(*shape):
8 return (numpy.random.rand(*shape) -0.5) * 0.001 10 return (numpy.random.rand(*shape) -0.5) * 0.001
9 def _function(inputs, outputs, linker='c&py'):
10 return theano.function(inputs, outputs, unpack_single=False,linker=linker)
11 11
12 class NeuralNet(object): 12 def _cache(d, key, valfn):
13 #valfn() is only evaluated if key isn't in dictionary d
14 if key not in d:
15 d[key] = valfn()
16 return d[key]
13 17
14 class Model(object): 18 class _Model(object):
15 def __init__(self, nnet, params): 19 def __init__(self, algo, params):
16 self.nnet = nnet 20 self.algo = algo
17 self.params = params 21 self.params = params
22 v = algo.v
23 self.update_fn = algo._fn([v.input, v.target] + v.params, [v.nll] + v.new_params)
24 self._fn_cache = {}
18 25
19 def update(self, trainset, stopper=None): 26 def __copy__(self):
20 """Update this model from more training data.""" 27 return _Model(self.algo, [copy.copy(p) for p in params])
21 v = self.nnet.v
22 params = self.params
23 update_fn = _function([v.input, v.target] + v.params, [v.nll] + v.new_params)
24 if stopper is not None:
25 raise NotImplementedError()
26 else:
27 for i in xrange(100):
28 for input, target in trainset.minibatches(['input', 'target'],
29 minibatch_size=min(32, len(trainset))):
30 results = update_fn(input, target[:,0], *params)
31 if 0: print results[0]
32 # print params['b']
33 28
34 def __call__(self, testset, 29 def update(self, input_target):
35 output_fieldnames=['output_class'], 30 """Update this model from more training data."""
36 test_stats_collector=None, 31 params = self.params
37 copy_inputs=False, 32 #TODO: why should we have to unpack target like this?
38 put_stats_in_output_dataset=True, 33 for input, target in input_target:
39 output_attributes=[]): 34 self.update_fn(input, target[:,0], *params)
40 """Apply this model (as a function) to new data"""
41 inputs = [self.nnet.v.input, self.nnet.v.target] + self.nnet.v.params
42 fn = _function(inputs, [getattr(self.nnet.v, name) for name in output_fieldnames])
43 if 'target' in testset.fieldNames():
44 return dataset.ApplyFunctionDataSet(testset,
45 lambda input, target: fn(input, target[:,0], *self.params),
46 output_fieldnames)
47 else:
48 return dataset.ApplyFunctionDataSet(testset,
49 lambda input: fn(input, numpy.zeros(1,dtype='int64'), *self.params),
50 output_fieldnames)
51 35
52 def __init__(self, ninputs, nhid, nclass, lr, nepochs, 36 def __call__(self, testset, fieldnames=['output_class']):
53 l2coef=0.0, 37 """Apply this model (as a function) to new data"""
54 linker='c&yp', 38 #TODO: cache fn between calls
55 hidden_layer=None): 39 assert 'input' == testset.fieldNames()[0]
56 if not hidden_layer: 40 assert len(testset.fieldNames()) <= 2
57 hidden_layer = AffineSigmoidLayer("hidden",ninputs,nhid,l2coef) 41 v = self.algo.v
58 class Vars: 42 outputs = [getattr(v, name) for name in fieldnames]
59 def __init__(self, lr, l2coef): 43 inputs = [v.input] + ([v.target] if 'target' in testset else [])
44 inputs.extend(v.params)
45 theano_fn = _cache(self._fn_cache, (tuple(inputs), tuple(outputs)),
46 lambda: self.algo._fn(inputs, outputs))
47 lambda_fn = lambda *args: theano_fn(*(list(args) + self.params))
48 return dataset.ApplyFunctionDataSet(testset, lambda_fn, fieldnames)
49
50 class AutonameVars(object):
51 def __init__(self, dct):
52 for key, val in dct.items():
53 if type(key) is str and hasattr(val, 'name'):
54 val.name = key
55 self.__dict__.update(dct)
56
57 class MultiLayerPerceptron(object):
58
59 def __init__(self, ninputs, nhid, nclass, lr,
60 l2coef=0.0,
61 linker='c&py',
62 hidden_layer=None,
63 early_stopper=None,
64 validation_portion=0.2,
65 V_extern=None):
66 class V_intern(AutonameVars):
67 def __init__(v_self, lr, l2coef, **kwargs):
60 lr = t.constant(lr) 68 lr = t.constant(lr)
61 l2coef = t.constant(l2coef) 69 l2coef = t.constant(l2coef)
62 input = t.matrix('input') # n_examples x n_inputs 70 input = t.matrix() # n_examples x n_inputs
63 target = t.ivector('target') # n_examples x 1 71 target = t.ivector() # len: n_examples
64 W2 = t.matrix('W2') 72 W2, b2 = t.matrix(), t.vector()
65 b2 = t.vector('b2')
66 73
67 hid = hidden_layer(input) 74 if hidden_layer:
68 hid_params = hidden_layer.params() 75 hid, hid_params, hid_ivals, hid_regularization = hidden_layer(input)
69 hid_params_init_vals = hidden_layer.params_ivals() 76 else:
70 hid_regularization = hidden_layer.regularization() 77 W1, b1 = t.matrix(), t.vector()
71 78 hid = t.tanh(b1 + t.dot(input, W1))
79 hid_params = [W1, b1]
80 hid_regularization = l2coef * t.sum(W1*W1)
81 hid_ivals = lambda : [_randshape(ninputs, nhid), _randshape(nhid)]
82
72 params = [W2, b2] + hid_params 83 params = [W2, b2] + hid_params
73 nll, predictions = nnet_ops.crossentropy_softmax_1hot( b2 + t.dot(hid, W2), target) 84 activations = b2 + t.dot(hid, W2)
85 nll, predictions = nnet_ops.crossentropy_softmax_1hot(activations, target)
74 regularization = l2coef * t.sum(W2*W2) + hid_regularization 86 regularization = l2coef * t.sum(W2*W2) + hid_regularization
75 output_class = t.argmax(predictions,1) 87 output_class = t.argmax(activations,1)
76 loss_01 = t.neq(output_class, target) 88 loss_01 = t.neq(output_class, target)
77 g_params = t.grad(nll + regularization, params) 89 g_params = t.grad(nll + regularization, params)
78 new_params = [t.sub_inplace(p, lr * gp) for p,gp in zip(params, g_params)] 90 new_params = [t.sub_inplace(p, lr * gp) for p,gp in zip(params, g_params)]
79 setattr_and_name(self, locals()) 91 self.__dict__.update(locals()); del self.self
92 AutonameVars.__init__(v_self, locals())
80 self.nhid = nhid 93 self.nhid = nhid
81 self.nclass = nclass 94 self.nclass = nclass
82 self.nepochs = nepochs 95 self.v = V_intern(**locals()) if V_extern is None else V_extern(**locals())
83 self.v = Vars(lr, l2coef) 96 self.linker = linker
84 self.params = None 97 self.early_stopper = early_stopper if early_stopper is not None else lambda: stopper.NStages(10,1)
98 self.validation_portion = validation_portion
99
100 def _fn(self, inputs, outputs):
101 # Caching here would hamper multi-threaded apps
102 # prefer caching in _Model.__call__
103 return theano.function(inputs, outputs, unpack_single=False, linker=self.linker)
85 104
86 def __call__(self, trainset=None, iparams=None): 105 def __call__(self, trainset=None, iparams=None):
106 """Allocate and optionally train a model"""
87 if iparams is None: 107 if iparams is None:
88 iparams = LookupList(["W","b"],[_randshape(self.nhid, self.nclass), _randshape(self.nclass)]) 108 iparams = [_randshape(self.nhid, self.nclass), _randshape(self.nclass)]\
89 + self.v.hid_params_init_vals() 109 + self.v.hid_ivals()
90 rval = NeuralNet.Model(self, iparams) 110 rval = _Model(self, iparams)
91 if trainset: 111 if trainset:
92 rval.update(trainset) 112 if len(trainset) == sys.maxint:
113 raise NotImplementedError('Learning from infinite streams is not supported')
114 nval = int(self.validation_portion * len(trainset))
115 nmin = len(trainset) - nval
116 assert nmin >= 0
117 minset = trainset[:nmin] #real training set for minimizing loss
118 valset = trainset[nmin:] #validation set for early stopping
119 best = rval
120 for stp in self.early_stopper():
121 rval.update(
122 trainset.minibatches(['input', 'target'], minibatch_size=min(32,
123 len(trainset))))
124 if stp.set_score:
125 stp.score = rval(valset, ['loss_01'])
126 if (stp.score < stp.best_score):
127 best = copy.copy(rval)
128 rval = best
93 return rval 129 return rval
94 130
95 131
96 def setattr_and_name(self, dict): 132 import unittest
97 """This will do a self.__setattr__ for all elements in the dict
98 (except for element self). In addition it will make sure that
99 each element's .name (if it exists) is set to the element's key
100 in the dicitonary.
101 Typical usage: setattr_and_name(self, locals()) """
102 for varname,var in locals.items():
103 if var is not self:
104 if hasattr(var,"name") and not var.name:
105 var.name=varname
106 self.__setattr__(varname,var)
107 133
134 class TestMLP(unittest.TestCase):
135 def test0(self):
136
137 training_set1 = dataset.ArrayDataSet(numpy.array([[0, 0, 0],
138 [0, 1, 1],
139 [1, 0, 1],
140 [1, 1, 1]]),
141 {'input':slice(2),'target':2})
142 training_set2 = dataset.ArrayDataSet(numpy.array([[0, 0, 0],
143 [0, 1, 1],
144 [1, 0, 0],
145 [1, 1, 1]]),
146 {'input':slice(2),'target':2})
147 test_data = dataset.ArrayDataSet(numpy.array([[0, 0, 0],
148 [0, 1, 1],
149 [1, 0, 0],
150 [1, 1, 1]]),
151 {'input':slice(2)})
152
153 learn_algo = MultiLayerPerceptron(2, 10, 2, .1
154 , linker='c&py'
155 , early_stopper = lambda:stopper.NStages(100,1))
156
157 model1 = learn_algo(training_set1)
158
159 model2 = learn_algo(training_set2)
160
161 n_match = 0
162 for o1, o2 in zip(model1(test_data), model2(test_data)):
163 #print o1
164 #print o2
165 n_match += (o1 == o2)
166
167 assert n_match == (numpy.sum(training_set1.fields()['target'] ==
168 training_set2.fields()['target']))
108 169
109 if __name__ == '__main__': 170 if __name__ == '__main__':
110 training_set1 = dataset.ArrayDataSet(numpy.array([[0, 0, 0], 171 unittest.main()
111 [0, 1, 1],
112 [1, 0, 1],
113 [1, 1, 1]]),
114 {'input':slice(2),'target':2})
115 training_set2 = dataset.ArrayDataSet(numpy.array([[0, 0, 0],
116 [0, 1, 1],
117 [1, 0, 0],
118 [1, 1, 1]]),
119 {'input':slice(2),'target':2})
120 test_data = dataset.ArrayDataSet(numpy.array([[0, 0, 0],
121 [0, 1, 1],
122 [1, 0, 0],
123 [1, 1, 1]]),
124 {'input':slice(2)})
125 172
126
127 learn_algo = NeuralNet(2, 10, 3, .1, 1000)
128
129 model = learn_algo()
130
131 model1 = learn_algo(training_set1)
132
133 model2 = learn_algo(training_set2)
134
135 n_match = 0
136 for o1, o2 in zip(model1(test_data), model2(test_data)):
137 n_match += (o1 == o2)
138
139 print n_match, numpy.sum(training_set1.fields()['target'] ==
140 training_set2.fields()['target'])
141