Mercurial > pylearn
comparison pylearn/algorithms/daa.py @ 633:e242c12eb30d
merged
author | desjagui@atchoum.iro.umontreal.ca |
---|---|
date | Wed, 21 Jan 2009 03:23:50 -0500 |
parents | b054271b2504 |
children | 89bc88affef0 |
comparison
equal
deleted
inserted
replaced
632:a11b7fbf3171 | 633:e242c12eb30d |
---|---|
1 | |
2 import theano | 1 import theano |
3 from theano import tensor as T | 2 from theano import tensor as T |
4 from theano.tensor import nnet as NN | 3 from theano.tensor import nnet as NN |
4 from hpu.conv import sp | |
5 import numpy as N | 5 import numpy as N |
6 | 6 from theano.printing import Print |
7 from pylearn import cost as cost | 7 |
8 | 8 from pylearn.algorithms import cost |
9 class DenoisingAA(T.RModule): | 9 |
10 # TODO: make this more generic (somewhere in pylearn) | |
11 def lnorm(param, type='L2'): | |
12 if type == 'L1': | |
13 return T.sum(T.abs(param)) | |
14 if type == 'L2': | |
15 return T.sum(param*param) | |
16 raise NotImplementedError('Only L1 and L2 regularization are currently implemented') | |
17 | |
18 def get_reg_cost(params, type): | |
19 rcost = 0 | |
20 for param in params: | |
21 rcost += lnorm(param, type) | |
22 return rcost | |
23 | |
24 class ScratchPad: | |
25 pass | |
26 | |
27 class DAA(T.RModule): | |
10 """De-noising Auto-encoder | 28 """De-noising Auto-encoder |
11 | |
12 WRITEME | |
13 | |
14 Abstract base class. Requires subclass with functions: | |
15 | |
16 - build_corrupted_input() | |
17 | |
18 Introductory article about this model WRITEME. | |
19 | |
20 | |
21 """ | 29 """ |
22 | 30 |
23 def __init__(self, input = None, regularize = True, tie_weights = True, | 31 # TODO: change n_hid_per_pixel to nkern |
24 activation_function=NN.sigmoid, reconstruction_cost_function=cost.cross_entropy): | 32 def __init__(self, img_shape, n_hid_per_pixel, |
33 batch_size=4, regularize = True, tie_weights = False, | |
34 hid_fn=NN.sigmoid, reconstruction_cost_function=cost.cross_entropy, **init): | |
25 """ | 35 """ |
26 :param input: WRITEME | |
27 | |
28 :param regularize: WRITEME | 36 :param regularize: WRITEME |
29 | |
30 :param tie_weights: WRITEME | 37 :param tie_weights: WRITEME |
31 | 38 :param hid_fn: WRITEME |
32 :param activation_function: WRITEME | |
33 | |
34 :param reconstruction_cost: Should return one cost per example (row) | 39 :param reconstruction_cost: Should return one cost per example (row) |
35 | |
36 :todo: Default noise level for all daa levels | 40 :todo: Default noise level for all daa levels |
37 | 41 |
38 """ | 42 """ |
39 super(DenoisingAA, self).__init__() | 43 super(DAA, self).__init__() |
40 | 44 |
41 # MODEL CONFIGURATION | 45 # MODEL CONFIGURATION |
46 self.img_shape = img_shape | |
47 self.input_size = N.prod(img_shape) | |
48 self.n_hid_per_pixel = n_hid_per_pixel | |
49 self.batch_size = batch_size | |
42 self.regularize = regularize | 50 self.regularize = regularize |
43 self.tie_weights = tie_weights | 51 self.tie_weights = tie_weights |
44 self.activation_function = activation_function | 52 self.hid_fn = hid_fn |
45 self.reconstruction_cost_function = reconstruction_cost_function | 53 self.reconstruction_cost_function = reconstruction_cost_function |
46 | 54 |
47 # ACQUIRE/MAKE INPUT | 55 ### DECLARE MODEL VARIABLES |
48 if not input: | 56 self.input = theano.External(T.dmatrix('input')) |
49 input = T.matrix('input') | 57 |
50 self.input = theano.External(input) | 58 #parameters |
51 | 59 self.w1 = theano.Member(T.dmatrix()) |
52 # HYPER-PARAMETERS | 60 self.w2 = self.w1.T if tie_weights else theano.Member(T.dmatrix()) |
61 self.b1 = theano.Member(T.dvector()) | |
62 self.b2 = theano.Member(T.dvector()) | |
63 | |
64 #hyper-parameters | |
53 self.lr = theano.Member(T.scalar()) | 65 self.lr = theano.Member(T.scalar()) |
54 | 66 |
55 # PARAMETERS | 67 |
56 self.w1 = theano.Member(T.matrix()) | 68 ### BEHAVIOURAL MODEL |
57 if not tie_weights: | 69 def init_behavioural(self): |
58 self.w2 = theano.Member(T.matrix()) | 70 self.noisy_input = self.corrupt_input() |
71 self.noise = ScratchPad() | |
72 self.clean = ScratchPad() | |
73 self.define_behavioural(self.clean, self.input) | |
74 self.define_behavioural(self.noise, self.noisy_input) | |
75 self.define_regularization() # call before cost | |
76 self.define_cost(self.clean) | |
77 self.define_cost(self.noise) | |
78 self.define_gradients() | |
79 self.define_interface() | |
80 | |
81 | |
82 def define_behavioural(self,container, input): | |
83 self.define_propup(container, input) | |
84 self.define_propdown(container) | |
85 | |
86 def define_propup(self, container, input): | |
87 container.hidden_activation = T.dot(input, self.w1) + self.b1 | |
88 container.hidden = self.hid_fn(container.hidden_activation) | |
89 | |
90 # DEPENDENCY: define_propup | |
91 def define_propdown(self, container): | |
92 container.output_activation = T.dot(container.hidden, self.w2) + self.b2 | |
93 container.output = self.hid_fn(container.output_activation) | |
94 | |
95 | |
96 # TODO: fix regularization type (outside parameter ?) | |
97 def define_regularization(self, regtype=None): | |
98 if regtype == None: | |
99 self.regularization = T.zero() # base model has no regularization! | |
100 return | |
101 self.reg_coef = theano.Member(T.scalar()) | |
102 self.regularization = self.reg_coef * get_reg_cost([self.w1,self.w2], 'L2') | |
103 | |
104 | |
105 # DEPENDENCY: define_behavioural, define_regularization | |
106 def define_cost(self, container): | |
107 container.reconstruction_cost = self.reconstruction_costs(container.output) | |
108 # TOTAL COST | |
109 container.cost = container.reconstruction_cost | |
110 if self.regularize: | |
111 container.cost = container.cost + self.regularization | |
112 | |
113 | |
114 # DEPENDENCY: define_cost | |
115 def define_gradients(self): | |
116 if not hasattr(self,'params'): | |
117 self.params = [] | |
118 if self.tie_weights: | |
119 self.params += [self.w1, self.b1, self.b2] | |
59 else: | 120 else: |
60 self.w2 = self.w1.T | 121 self.params += [self.w1, self.w2, self.b1, self.b2] |
61 self.b1 = theano.Member(T.vector()) | 122 |
62 self.b2 = theano.Member(T.vector()) | 123 self.gradients = T.grad(self.noise.cost, self.params) |
63 | 124 self.updates = dict((p, p - self.lr * g) for p, g in \ |
64 | 125 zip(self.params, self.gradients)) |
65 # REGULARIZATION COST | 126 |
66 self.regularization = self.build_regularization() | 127 |
67 | 128 # DEPENDENCY: define_behavioural, define_regularization, define_cost, define_gradients |
68 | 129 def define_interface(self): |
69 ### NOISELESS ### | 130 self.update = theano.Method(self.input, self.noise.cost, self.updates) |
70 | 131 self.compute_cost = theano.Method(self.input, self.clean.cost) |
71 # HIDDEN LAYER | 132 self.noisify = theano.Method(self.input, self.noisy_input) |
72 self.hidden_activation = T.dot(self.input, self.w1) + self.b1 | 133 self.reconstruction = theano.Method(self.input, self.clean.output) |
73 self.hidden = self.hid_activation_function(self.hidden_activation) | 134 self.representation = theano.Method(self.input, self.clean.hidden) |
74 | 135 self.reconstruction_through_noise = theano.Method(self.input,\ |
75 # RECONSTRUCTION LAYER | 136 [self.noisy_input, self.noise.output]) |
76 self.output_activation = T.dot(self.hidden, self.w2) + self.b2 | 137 self.validate = theano.Method(self.input, [self.clean.cost, self.clean.output]) |
77 self.output = self.out_activation_function(self.output_activation) | 138 |
78 | 139 |
79 # RECONSTRUCTION COST | 140 def corrupt_input(self): |
80 self.reconstruction_costs = self.build_reconstruction_costs(self.output) | 141 self.noise_level = theano.Member(T.scalar()) |
81 self.reconstruction_cost = T.mean(self.reconstruction_costs) | 142 return self.random.binomial(T.shape(self.input), 1, 1 - self.noise_level) * self.input |
82 | 143 |
83 # TOTAL COST | 144 # what about filter_scale ? |
84 self.cost = self.reconstruction_cost | 145 def _instance_initialize(self, obj, lr=None, seed=1, alloc=True, **init): |
85 if self.regularize: | 146 |
86 self.cost = self.cost + self.regularization | 147 init.setdefault('reg_coef', 0) |
87 | 148 init.setdefault('noise_level', 0) |
88 | 149 obj.lr = lr |
89 ### WITH NOISE ### | 150 |
90 self.corrupted_input = self.build_corrupted_input() | 151 super(DAA, self)._instance_initialize(obj, **init) |
91 | 152 |
92 # HIDDEN LAYER | 153 self.R = N.random.RandomState(seed) if seed is not None else N.random |
93 self.nhidden_activation = T.dot(self.corrupted_input, self.w1) + self.b1 | |
94 self.nhidden = self.hid_activation_function(self.nhidden_activation) | |
95 | |
96 # RECONSTRUCTION LAYER | |
97 self.noutput_activation = T.dot(self.nhidden, self.w2) + self.b2 | |
98 self.noutput = self.out_activation_function(self.noutput_activation) | |
99 | |
100 # RECONSTRUCTION COST | |
101 self.nreconstruction_costs = self.build_reconstruction_costs(self.noutput) | |
102 self.nreconstruction_cost = T.mean(self.nreconstruction_costs) | |
103 | |
104 # TOTAL COST | |
105 self.ncost = self.nreconstruction_cost | |
106 if self.regularize: | |
107 self.ncost = self.ncost + self.regularization | |
108 | |
109 | |
110 # GRADIENTS AND UPDATES | |
111 if self.tie_weights: | |
112 self.params = self.w1, self.b1, self.b2 | |
113 else: | |
114 self.params = self.w1, self.w2, self.b1, self.b2 | |
115 gradients = T.grad(self.ncost, self.params) | |
116 updates = dict((p, p - self.lr * g) for p, g in zip(self.params, gradients)) | |
117 | |
118 # INTERFACE METHODS | |
119 self.update = theano.Method(self.input, self.ncost, updates) | |
120 self.compute_cost = theano.Method(self.input, self.cost) | |
121 self.noisify = theano.Method(self.input, self.corrupted_input) | |
122 self.reconstruction = theano.Method(self.input, self.output) | |
123 self.representation = theano.Method(self.input, self.hidden) | |
124 self.reconstruction_through_noise = theano.Method(self.input, [self.corrupted_input, self.noutput]) | |
125 | |
126 self.validate = theano.Method(self.input, [self.cost, self.output]) | |
127 | |
128 def _instance_initialize(self, obj, input_size = None, hidden_size = None, seed = None, **init): | |
129 if (input_size is None) ^ (hidden_size is None): | |
130 raise ValueError("Must specify input_size and hidden_size or neither.") | |
131 super(DenoisingAA, self)._instance_initialize(obj, **init) | |
132 if seed is not None: | |
133 R = N.random.RandomState(seed) | |
134 else: | |
135 R = N.random | |
136 if input_size is not None: | |
137 sz = (input_size, hidden_size) | |
138 inf = 1/N.sqrt(input_size) | |
139 hif = 1/N.sqrt(hidden_size) | |
140 obj.w1 = R.uniform(size = sz, low = -inf, high = inf) | |
141 if not self.tie_weights: | |
142 obj.w2 = R.uniform(size = list(reversed(sz)), low = -hif, high = hif) | |
143 obj.b1 = N.zeros(hidden_size) | |
144 obj.b2 = N.zeros(input_size) | |
145 if seed is not None: | 154 if seed is not None: |
146 obj.seed(seed) | 155 obj.seed(seed) |
147 obj.__hide__ = ['params'] | 156 obj.__hide__ = ['params'] |
148 | 157 |
149 def build_regularization(self): | 158 self.inf = 1/N.sqrt(self.input_size) |
150 """ | 159 self.hif = 1/N.sqrt(self.n_hid_per_pixel) |
151 @todo: Why do we need this function? | 160 |
152 """ | 161 if alloc: |
153 return T.zero() # no regularization! | 162 w1shp = (self.input_size, self.n_hid_per_pixel) |
154 | 163 w2shp = list(reversed(w1shp)) |
155 | 164 |
156 class SigmoidXEDenoisingAA(DenoisingAA): | 165 obj.w1 = self.R.uniform(size=w1shp, low = -self.inf, high = self.inf) |
157 """ | 166 if not self.tie_weights: |
158 @todo: Merge this into the above. | 167 obj.w2 = self.R.uniform(size=w2shp, low=-self.hif, high=self.hif) |
159 @todo: Default noise level for all daa levels | 168 |
160 """ | 169 obj.b1 = N.zeros(self.n_hid_per_pixel) |
161 | 170 obj.b2 = N.zeros(self.input_size) |
162 def build_corrupted_input(self): | 171 |
163 self.noise_level = theano.Member(T.scalar()) | 172 |
164 return self.random.binomial(T.shape(self.input), 1, 1 - self.noise_level) * self.input | 173 |
165 | 174 #TODO: these should be made generic |
166 def hid_activation_function(self, activation): | 175 ############## HELPER FUNCTIONS ##################### |
167 return self.activation_function(activation) | 176 def reconstruction_costs(self, output): |
168 | |
169 def out_activation_function(self, activation): | |
170 return self.activation_function(activation) | |
171 | |
172 def build_reconstruction_costs(self, output): | |
173 return self.reconstruction_cost_function(self.input, output) | 177 return self.reconstruction_cost_function(self.input, output) |
174 | 178 |
175 def build_regularization(self): | 179 |
176 self.l2_coef = theano.Member(T.scalar()) | 180 ############################################## |
177 if self.tie_weights: | 181 # QUADRATIC DAA # |
178 return self.l2_coef * T.sum(self.w1 * self.w1) | 182 ############################################## |
183 class QuadraticDAA(DAA): | |
184 | |
185 def __init__(self, img_shape, n_hid_per_pixel, n_quadratic_filters=0, | |
186 batch_size=4, regularize = True, hid_fn=NN.sigmoid, | |
187 reconstruction_cost_function=cost.cross_entropy, **init): | |
188 | |
189 # set tied-weights to False for QDAAs | |
190 super(QuadraticDAA, self).__init__(img_shape, n_hid_per_pixel, batch_size, regularize, | |
191 False, hid_fn, reconstruction_cost_function, **init) | |
192 | |
193 self.n_quadratic_filters = n_quadratic_filters | |
194 self.qfilters = [theano.Member(T.dmatrix()) \ | |
195 for i in xrange(n_quadratic_filters)] | |
196 | |
197 # TODO: verify with James that the formula is correct (without sqrt) | |
198 def define_propup(self, container, input): | |
199 if self.n_quadratic_filters: | |
200 qsum = 0 | |
201 for qf in self.qfilters: | |
202 qsum = qsum + T.dot(input, qf)**2 | |
203 container.hidden_activation = T.dot(input, self.w1) + self.b1 + qsum | |
179 else: | 204 else: |
180 return self.l2_coef * (T.sum(self.w1 * self.w1) + T.sum(self.w2 * self.w2)) | 205 container.hidden_activation = T.dot(input, self.w1) + self.b1 |
181 | 206 container.hidden = self.hid_fn(container.hidden_activation) |
182 def _instance_initialize(self, obj, input_size = None, hidden_size = None, seed = None, **init): | 207 |
183 init.setdefault('noise_level', 0) | 208 def define_gradients(self): |
184 init.setdefault('l2_coef', 0) | 209 self.params = self.qfilters |
185 super(SigmoidXEDenoisingAA, self)._instance_initialize(obj, input_size, hidden_size, seed, **init) | 210 DAA.define_gradients(self) |
186 | 211 |
212 def _instance_initialize(self, obj, lr, seed=1, qfilter_relscale=.01, **init): | |
213 # only call constructor of base-class if we are instantiating QuadraticDAA | |
214 if self.__class__ == QuadraticDAA: | |
215 super(QuadraticDAA, self)._instance_initialize(obj, lr, seed, **init) | |
216 obj.qfilters = [self.R.uniform(size=obj.w1.shape, low=-self.inf, high=self.inf)*\ | |
217 qfilter_relscale for qf in self.qfilters] | |
218 | |
219 | |
220 | |
221 ############################################## | |
222 # SPARSE QUADRATIC DAA # | |
223 ############################################## | |
224 class SparseQuadraticDAA(QuadraticDAA): | |
225 | |
226 def __init__(self, img_shape, n_hid_per_pixel, | |
227 filter_shape, step_size=(1,1), conv_mode='valid', | |
228 n_quadratic_filters=0, batch_size=4, | |
229 regularize = True, hid_fn=NN.sigmoid, | |
230 reconstruction_cost_function=cost.cross_entropy, **init): | |
231 | |
232 QuadraticDAA.__init__(self, img_shape, n_hid_per_pixel, n_quadratic_filters, | |
233 batch_size, regularize, hid_fn, reconstruction_cost_function, **init) | |
234 | |
235 # need to override parameters for sparse operations (vector instead of matrix) | |
236 self.w1 = theano.Member(T.dvector()) | |
237 self.w2 = theano.Member(T.dmatrix()) | |
238 self.qfilters = [theano.Member(T.dvector()) for i in xrange(n_quadratic_filters)] | |
239 | |
240 self.filter_shape = filter_shape | |
241 self.step_size = step_size | |
242 self.conv_mode = conv_mode | |
243 | |
244 def define_propup(self, container, input): | |
245 | |
246 lin_hid_activ, self.hid_shape = sp.applySparseFilter(\ | |
247 self.w1, self.filter_shape, self.n_hid_per_pixel, | |
248 self.input, self.img_shape, self.step_size, self.conv_mode) | |
249 self.nl1feats = N.prod(self.hid_shape) | |
250 | |
251 # apply quadratic filters | |
252 qsum = 0 | |
253 for qf in self.qfilters: | |
254 temp, hidshape = sp.applySparseFilter(qf, self.filter_shape,\ | |
255 self.n_hid_per_pixel, self.input, self.img_shape, | |
256 self.step_size, self.conv_mode) | |
257 qsum = qsum + temp**2 | |
258 quad_hid_activ = qsum | |
259 | |
260 hid_activ = lin_hid_activ + quad_hid_activ if self.n_quadratic_filters \ | |
261 else lin_hid_activ | |
262 | |
263 container.hidden_activation = hid_activ | |
264 container.hidden = self.hid_fn(container.hidden_activation) | |
265 | |
266 def define_propdown(self, container): | |
267 pass | |
268 | |
269 def _instance_initialize(self, obj, lr, seed=1, qfilter_relscale=.01, **init): | |
270 | |
271 # change weight shapes based on sparse weight matrix parameters | |
272 DAA._instance_initialize(self, obj, lr, seed, alloc=False, **init) | |
273 | |
274 # override weight initialization | |
275 w1shp = N.prod(self.hid_shape)*N.prod(self.filter_shape) | |
276 w2shp = (N.prod(self.hid_shape), self.input_size) | |
277 obj.w1 = self.R.uniform(size=w1shp, low=-self.inf, high=self.inf) | |
278 obj.w2 = self.R.uniform(size=w2shp, low=-self.hif, high=self.hif) | |
279 obj.b1 = N.zeros(w1shp) | |
280 obj.b2 = N.zeros(w2shp[1]) | |
281 | |
282 QuadraticDAA._instance_initialize(self, obj, lr, seed, qfilter_relscale, **init) | |
283 | |
284 | |
285 | |
286 ############################################## | |
287 # CONVOLUTIONAL QUADRATIC DAA # | |
288 ############################################## | |
289 class ConvQuadraticDAA(QuadraticDAA): | |
290 | |
291 def __init__(self, img_shape, n_hid_per_pixel, | |
292 filter_shape, step_size=(1,1), conv_mode='valid', | |
293 n_quadratic_filters=0, batch_size=4, | |
294 regularize = True, hid_fn=NN.sigmoid, | |
295 reconstruction_cost_function=cost.cross_entropy, **init): | |
296 | |
297 QuadraticDAA.__init__(self, img_shape, n_hid_per_pixel, n_quadratic_filters, | |
298 batch_size, regularize, hid_fn, reconstruction_cost_function, **init) | |
299 | |
300 # need to override parameters for sparse operations (vector instead of matrix) | |
301 self.w1 = theano.Member(T.dmatrix()) | |
302 self.w2 = theano.Member(T.dmatrix()) | |
303 self.b1 = theano.Member(T.dmatrix()) | |
304 self.qfilters = [theano.Member(T.dmatrix()) for i in xrange(n_quadratic_filters)] | |
305 | |
306 self.filter_shape = filter_shape | |
307 self.step_size = step_size | |
308 self.conv_mode = conv_mode | |
309 | |
310 def define_propup(self, container, input): | |
311 | |
312 lin_hid_activ, self.hid_shape = sp.convolve(self.w1, self.filter_shape, | |
313 self.n_hid_per_pixel, self.input, self.img_shape, self.step_size, | |
314 self.conv_mode, flatten=False) | |
315 self.nl1feats = N.prod(self.hid_shape) | |
316 | |
317 # apply quadratic filters | |
318 qsum = 0 | |
319 for qf in self.qfilters: | |
320 temp, hidshape = sp.convolve(qf, self.filter_shape, self.n_hid_per_pixel, | |
321 self.input, self.img_shape, self.step_size, self.conv_mode, flatten=False) | |
322 qsum = qsum + temp**2 | |
323 quad_hid_activ = qsum | |
324 | |
325 hid_activ = lin_hid_activ + quad_hid_activ if self.n_quadratic_filters \ | |
326 else lin_hid_activ | |
327 | |
328 container.hidden_activation = hid_activ | |
329 container.hidden = T.flatten(self.hid_fn(container.hidden_activation), 2) | |
330 | |
331 def define_propdown(self, container): | |
332 pass | |
333 | |
334 | |
335 def _instance_initialize(self, obj, lr, seed=1, qfilter_relscale=.01, **init): | |
336 | |
337 # change weight shapes based on sparse weight matrix parameters | |
338 DAA._instance_initialize(self, obj, lr, seed, alloc=False, **init) | |
339 | |
340 # override weight initialization | |
341 w1shp = (self.n_hid_per_pixel, N.prod(self.filter_shape)) | |
342 w2shp = (N.prod(self.hid_shape), self.input_size) | |
343 obj.w1 = self.R.uniform(size=w1shp, low=-self.inf, high=self.inf) | |
344 obj.w2 = self.R.uniform(size=w2shp, low=-self.hif, high=self.hif) | |
345 obj.b1 = N.zeros((self.n_hid_per_pixel,1)) | |
346 obj.b2 = N.zeros(w2shp[1]) | |
347 | |
348 QuadraticDAA._instance_initialize(self, obj, lr, seed, qfilter_relscale, **init) | |
349 | |
350 | |
351 ############################################## | |
352 # TEST CODE | |
353 ############################################## | |
354 def debug(): | |
355 img_shape = (3,3) | |
356 n_hid_per_pixel = 1 | |
357 filter_shape = (2,2) | |
358 step_size = (1,1) | |
359 conv_mode = 'full' | |
360 batch_size = 10 | |
361 | |
362 R = N.random.RandomState(100) | |
363 data = R.random_integers(0, 1, size=(batch_size, N.prod(img_shape))) | |
364 | |
365 print 'Instantiating DAA...', | |
366 daa_model = DAA(img_shape, n_hid_per_pixel, batch_size=batch_size) | |
367 daa_model.init_behavioural() | |
368 daa = daa_model.make(lr=0.1) | |
369 daa.update(data) | |
370 print 'done' | |
371 | |
372 print 'Instantiating QuadraticDAA...', | |
373 qdaa_model = QuadraticDAA(img_shape, n_hid_per_pixel, | |
374 n_quadratic_filters=1, batch_size=batch_size) | |
375 qdaa_model.init_behavioural() | |
376 qdaa = qdaa_model.make(lr=0.1) | |
377 qdaa.update(data) | |
378 print 'done' | |
379 | |
380 print 'Instantiating SparseQuadraticDAA...', | |
381 sp_qdaa_model = SparseQuadraticDAA(img_shape, n_hid_per_pixel, | |
382 filter_shape, step_size, conv_mode, | |
383 n_quadratic_filters=1, batch_size=batch_size) | |
384 sp_qdaa_model.init_behavioural() | |
385 sp_qdaa = sp_qdaa_model.make(lr=0.1) | |
386 sp_qdaa.representation(data) | |
387 sp_qdaa.reconstruction(data) | |
388 sp_qdaa.update(data) | |
389 print 'done!' | |
390 | |
391 print 'Instantiating ConvQuadraticDAA...', | |
392 conv_qdaa_model = ConvQuadraticDAA(img_shape, n_hid_per_pixel, | |
393 filter_shape, step_size, conv_mode, | |
394 n_quadratic_filters=1, batch_size=batch_size) | |
395 conv_qdaa_model.init_behavioural() | |
396 conv_qdaa = conv_qdaa_model.make(lr=0.1) | |
397 conv_qdaa.representation(data) | |
398 conv_qdaa.reconstruction(data) | |
399 conv_qdaa.update(data) | |
400 print 'done!' | |
401 | |
402 def test(): | |
403 | |
404 from pylearn.datasets import MNIST | |
405 from pylearn.datasets import make_dataset | |
406 import pylab as pl | |
407 | |
408 def showimg(x,y): | |
409 for i in range(batch_size): | |
410 pl.subplot(2,batch_size,i+1); pl.gray(); pl.axis('off'); | |
411 pl.imshow(x[i,:].reshape(img_shape)) | |
412 pl.subplot(2,batch_size,batch_size+i+1); pl.gray(); pl.axis('off'); | |
413 pl.imshow(y[i,:].reshape(img_shape)) | |
414 pl.show() | |
415 | |
416 img_shape = (28,28) | |
417 n_hid_per_pixel = 1 | |
418 n_quadratic_filters = 0 | |
419 batch_size = 4 | |
420 epochs = 50 | |
421 lr = .01 | |
422 filter_shape = (5,5) | |
423 step_size = (2,2) | |
424 conv_mode = 'valid' | |
425 | |
426 dataset = make_dataset('MNIST',variant='1k') | |
427 | |
428 #print 'Instantiating DAA...', | |
429 #daa_model = DAA(img_shape, n_hid_per_pixel, batch_size=batch_size, regularize=False) | |
430 #daa_model.init_behavioural() | |
431 #daa = daa_model.make(lr=lr, mode='FAST_RUN') | |
432 #print 'done' | |
433 | |
434 #print 'Instantiating QuadraticDAA...', | |
435 #daa_model = QuadraticDAA(img_shape, n_hid_per_pixel, | |
436 #n_quadratic_filters=n_quadratic_filters, batch_size=batch_size) | |
437 #daa_model.init_behavioural() | |
438 #daa = daa_model.make(lr=0.1, mode='FAST_RUN') | |
439 | |
440 print 'Instantiating SparseQuadraticDAA...', | |
441 daa_model = SparseQuadraticDAA(img_shape, n_hid_per_pixel, | |
442 filter_shape, step_size, conv_mode, | |
443 n_quadratic_filters=n_quadratic_filters, batch_size=batch_size) | |
444 daa_model.init_behavioural() | |
445 daa = daa_model.make(lr=0.1, mode='FAST_RUN') | |
446 | |
447 #print 'Instantiating ConvQuadraticDAA...', | |
448 #daa_model = ConvQuadraticDAA(img_shape, n_hid_per_pixel, | |
449 #filter_shape, step_size, conv_mode, | |
450 #n_quadratic_filters=1, batch_size=batch_size) | |
451 #daa_model.init_behavioural() | |
452 #daa = daa_model.make(lr=0.1, mode='FAST_RUN') | |
453 | |
454 for ep in range(epochs): | |
455 print '********** Epoch %i *********' % ep | |
456 imgi=0 | |
457 for i in range(dataset.train.x.shape[0]/batch_size): | |
458 x = dataset.train.x[imgi:imgi+batch_size,:] | |
459 print daa.update(x) | |
460 imgi += batch_size | |
461 | |
462 if (ep+1) % 1 == 0: | |
463 starti = N.floor(N.random.rand()*(1000-4)) | |
464 x = dataset.train.x[starti:starti+batch_size,:] | |
465 x_rec = daa.reconstruction(x) | |
466 showimg(x,x_rec) | |
467 | |
468 if __name__ == '__main__': | |
469 test() |