comparison doc/v2_planning/plugin_PL.py @ 1253:826d78f0135f

Prototype for "hooks" simpler than full control-flow rewrite.
author Pascal Lamblin <lamblinp@iro.umontreal.ca>
date Fri, 24 Sep 2010 01:46:12 -0400
parents
children
comparison
equal deleted inserted replaced
1252:4a1339682c8f 1253:826d78f0135f
1 class RBM(Model):
2 '''
3 Restricted Boltzmann Machine.
4 '''
5 def __init__(self, n_visible, n_hidden, visible = None, name = None):
6
7 if name is None:
8 self.__name__ = self.__class__.__name__
9 else:
10 self.__name__ = name
11 self.n_visible = n_visible
12 self.n_hidden = n_hidden
13
14 if self.visible is None:
15 self.visible = theano.tensor.matrix([name='.'.join(self.__name__, 'visible']))
16 else:
17 self.visible = visible
18
19 self.W = theano.shared(
20 numpy.zeros((n_visible, n_hidden), dtype=theano.config.floatX),
21 name=self.__name__ + '.W')
22 self.b_hid = theano.shared(
23 numpy.zeros((n_hidden,), dtype=theano.config.floatX),
24 name=self.__name__ + '.b_hid')
25 self.b_vis = theano.shared(
26 numpy.zeros((n_hidden,), dtype=theano.config.floatX),
27 name=self.__name__ + '.b_vis')
28
29 self.inputs = [self.visible]
30 self.targets = []
31 self.parameters = [self.W, self.b_hid, self.b_vis]
32 self.outputs = ...
33 self.cost = None
34 self.gradients = [...]
35
36 class LogisticRegression(Model):
37 pass
38
39 class GradientBasedLearner(Learner):
40 '''
41 Learner that uses a gradient-base Optimizer to train a Model.
42 '''
43 def __init__(self, model, optimizer, name = None):
44 self.model = model
45 self.optimizer = optimizer
46 ...
47
48 self.updates = optimizer.iterative_optimizer(
49 parameters = model.parameters,
50 cost = model.cost)
51
52 # TODO: not sure of how to interface data set with the function
53 self.train_fn = theano.function(model.inputs+model.targets,
54 model.cost, updates=self.updates)
55
56 def use_dataset(self, dataset):
57 self.train_set = dataset
58
59 # The decorator indicates that this function will declare some hooks.
60 # More hooks could be automatically declared, for instance
61 # 'begin_function' and 'end_function'
62 @declare_hooks(['begin_train_iter', 'end_train_iter'])
63 def adapt(self, n_steps=1):
64 for i in xrange(n_steps):
65 self.adapt.hooks.execute(
66 'begin_train_iter',
67 context = dict(iter=i, total=n_steps, locals=locals()))
68
69 data = self.train_set.next()
70 self.train_fn(data)
71
72 self.adapt.hooks.execute(
73 'end_train_iter',
74 context = dict(iter = i, locals=locals()))
75
76
77 class SGD(Optimizer):
78 '''
79 Stochastic gradient descent with fixed learning rate.
80 '''
81 def __init__(self, step_size):
82 self.step_size = step_size
83
84 def iterative_optimizer(
85 parameters,
86 cost=None,
87 gradients=None,
88 stop=None,
89 updates=None,
90 ):
91
92 if updates is not None:
93 ret = updates
94 else:
95 ret = {}
96
97 if gradients is None:
98 if cost is None:
99 raise SomeError('SGD needs to be provided either a cost or a gradients list')
100 gradients = theano.tensor.grad(cost, parameters)
101
102 for p, g in izip(parameters, gradients):
103 if p in updates:
104 raise KeyError('Parameter %s already has an update value (%s)' % (p, g))
105 ret[p] = p - self.step_size * g
106
107 # never stop
108 if stop is not None:
109 ret[stop] = False
110
111 return ret
112
113
114
115 class DBN(Learner):
116 '''
117 Deep Belief Network.
118 '''
119 def __init__(self, n_layers, layer_config, n_ft_steps, ft_step_size):
120 # Layers are GradientBasedLearners, with DBN as Model
121 self.layers = []
122 # Pretraining cumulative schedule
123 self.pt_cumul_schedule = [0]
124 # Build the layers and the fully-connected model
125 self.input = theano.tensor.matrix(name='.'.join([self.__name__, 'input']))
126 self.output = self.input
127 self.ft_params = []
128 for i,lconf in enumerate(layer_config):
129 rbm = RBM(visible = self.output, ...)
130 self.output = rbm.hidden_expectation
131 layer = GradientBasedLearner(...)
132 self.layers.append(layer)
133
134 self.pt_cumul_schedule.append(
135 self.pt_cumul_schedule[-1]+lc.n_pretrain_steps)
136
137 self.ft_params.extend([rbm.W, rbm.b_hid])
138
139 # Build the fine-tunable model
140 self.target = theano.tensor.ivector(name='.'.join([self.__name__, 'target']))
141 logreg = LogisticRegression(...)
142 self.output = logreg.output
143 self.cost = logreg.nll
144 self.ft_params.extend([logreg.W, logreg.b])
145
146 ft_optimizer = SGD(ft_step_size)
147 self.ft_updates = ft_optimizer.iterative_optimizer(
148 parameters = self.ft_params,
149 cost = self.cost)
150 self.ft_fn = theano.function(
151 [self.input, self.target],
152 self.cost,
153 updates = self.ft_updates,
154 name='.'.join([self.__name__, 'ft_fn']))
155
156 self.stage = 0
157
158 @declare_hooks([
159 'begin_pretrain_layer','end_pretrain_layer',
160 'begin_finetune_iter', 'end_finetune_iter'])
161
162 def adapt(self, n_steps=1):
163 '''
164 Each "step" is accomplished by the corresponding Learner (either an RBM,
165 or the global NNet).
166 '''
167 train_x, train_y = self.dataset
168 n_remaining_steps = n_steps
169 # Unsupervised pre-training
170 for i, layer in ienumerate(self.layers):
171 if (self.pt_cumul_schedule[i] <= self.stage
172 and self.stage < self.pt_cumul_schedule[i+1]):
173
174 self.adapt.hooks.execute(
175 'begin_pretrain_layer',
176 context = dict(iter=i, total=len(self.layers), locals=locals()))
177
178 n_pt_steps = min(n_remaining_steps, self.pt_cumul_schedule[i+1] - self.stage)
179 layer.use_dataset(train_x)
180 layer.adapt(n_steps = n_pt_steps)
181 self.stage += self.n_pt_steps
182 n_remaining_steps -= n_pt_steps
183
184 self.adapt.hooks.execute(
185 'end_pretrain_layer',
186 context = dict(iter=i, total=len(self.layers), locals=locals()))
187
188 # For the next layer, the data needs to be preprocessed
189 train_x = layer.compute_Eh_given_v(train_x) # or just compute_output?
190
191 # Supervised fine-tuning
192 if n_remaining_steps > 0:
193 sup_data = train_x, train_y
194 for i in xrange(n_remaining_steps):
195 self.adapt.hooks.execute(
196 'begin_train_iter',
197 context = dict(iter=i, total=n_steps, locals=locals()))
198
199 data = self.train_set.next()
200 self.ft_fn(data)
201
202 self.adapt.hooks.execute(
203 'end_train_iter',
204 context = dict(iter = i, locals=locals()))
205
206
207 ## TODO: implement k-fold cross-validation
208
209 class Hooks:
210 def __init__(self):
211 # The DB consists in a dictionary,
212 # the keys are the hooks' names (as strings),
213 # the values are lists of (function, exec_condition) pairs of functions
214 self.db = {}
215
216 def declare(self, name):
217 if name in self.db:
218 raise KeyError('Hook "%s" is already declared' % name)
219 self.db[name] = []
220
221 def execute(self, name, context):
222 if name not in self.db:
223 raise KeyError('Hook "%s" does not exist', % name)
224 #TODO: add contextual information to context, like current time, time of last call,...
225 for fn, cond in self.db[name]:
226 if cond(**context):
227 fn(**context)
228
229 def register(self, name, function, exec_condition):
230 if name not in self.db:
231 raise KeyError('Hook "%s" does not exist', % name)
232 self.db[name].append((function, exec_condition))
233
234 #TODO: add __getattr__ to have more intuitive access to the hooks
235
236 # Hook declaration mechanism
237 def declare_hooks(hooks_list):
238 def deco(f):
239 f.hooks = Hooks()
240 for hook_name in hooks_list:
241 f.hooks.declare(hook_name)
242
243 return deco
244
245 # Conditions
246 def always():
247 return lambda *args, **kwargs: True
248
249
250 def main():
251 train_data = MNIST.gettrain()
252 test_data = MNIST.gettest()
253
254 train_x, train_y = train_data
255
256 preprocessor = PCA(ndim = 80)
257 preprocessor.train(train_x)
258
259 preprocessed_x = preprocessor.compute_output(train_x)
260 ## for more robustess, we can have something like:
261 # peprocessed_x = ProcessDataSet(orig_data = train_x, function = preprocessor.compute_output)
262
263
264 x = matrix()
265 dbn = DBN(
266 input = x,
267 n_layers = 3,
268 layer_config = [dict(n_hidden = 500, n_unsup_steps=1000)] * 3
269 )
270
271 dbn.layers[0].adapt.hooks.register(
272 'begin_train_iter',
273 function = ...,
274 exec_cond = always()
275 )
276
277 dbn.layers[0].adapt.hooks.register(
278 'end_train_iter',
279 function = ...,
280 exec_cond = lambda iter, **kwargs: iter%20==0
281 )
282
283
284 if __name__ == '__main__':
285 main()
286
287