pylearn: pylearn/algorithms/daa.py comparison

comparison pylearn/algorithms/daa.py @ 633:e242c12eb30d

merged

author	desjagui@atchoum.iro.umontreal.ca
date	Wed, 21 Jan 2009 03:23:50 -0500
parents	b054271b2504
children	89bc88affef0

comparison

equal deleted inserted replaced

-:a11b7fbf3171
+:e242c12eb30d
 import theano
 from theano import tensor as T
 from theano.tensor import nnet as NN
+from hpu.conv import sp
 import numpy as N
+from theano.printing import Print
-from pylearn import cost as cost
+from pylearn.algorithms import cost
-class DenoisingAA(T.RModule):
+# TODO: make this more generic (somewhere in pylearn)
+def lnorm(param, type='L2'):
+if type == 'L1':
+return T.sum(T.abs(param))
+if type == 'L2':
+return T.sum(param*param)
+raise NotImplementedError('Only L1 and L2 regularization are currently implemented')
+def get_reg_cost(params, type):
+rcost = 0
+for param in params:
+rcost += lnorm(param, type)
+return rcost
+class ScratchPad:
+pass
+class DAA(T.RModule):
 """De-noising Auto-encoder
-WRITEME
-Abstract base class. Requires subclass with functions:
-- build_corrupted_input()
-Introductory article about this model WRITEME.
 """
-def __init__(self, input = None, regularize = True, tie_weights = True,
+# TODO: change n_hid_per_pixel to nkern
-activation_function=NN.sigmoid, reconstruction_cost_function=cost.cross_entropy):
+def __init__(self, img_shape, n_hid_per_pixel,
+batch_size=4, regularize = True, tie_weights = False,
+hid_fn=NN.sigmoid, reconstruction_cost_function=cost.cross_entropy, **init):
 """
-:param input: WRITEME
 :param regularize: WRITEME
 :param tie_weights: WRITEME
+:param hid_fn: WRITEME
-:param activation_function: WRITEME
 :param reconstruction_cost: Should return one cost per example (row)
 :todo: Default noise level for all daa levels
 """
-super(DenoisingAA, self).__init__()
+super(DAA, self).__init__()
 # MODEL CONFIGURATION
+self.img_shape = img_shape
+self.input_size = N.prod(img_shape)
+self.n_hid_per_pixel = n_hid_per_pixel
+self.batch_size = batch_size
 self.regularize = regularize
 self.tie_weights = tie_weights
-self.activation_function = activation_function
+self.hid_fn = hid_fn
 self.reconstruction_cost_function = reconstruction_cost_function
-# ACQUIRE/MAKE INPUT
+### DECLARE MODEL VARIABLES
-if not input:
+self.input = theano.External(T.dmatrix('input'))
-input = T.matrix('input')
-self.input = theano.External(input)
+#parameters
+self.w1 = theano.Member(T.dmatrix())
-# HYPER-PARAMETERS
+self.w2 = self.w1.T if tie_weights else theano.Member(T.dmatrix())
+self.b1 = theano.Member(T.dvector())
+self.b2 = theano.Member(T.dvector())
+#hyper-parameters
 self.lr = theano.Member(T.scalar())
-# PARAMETERS
-self.w1 = theano.Member(T.matrix())
+### BEHAVIOURAL MODEL
-if not tie_weights:
+def init_behavioural(self):
-self.w2 = theano.Member(T.matrix())
+self.noisy_input = self.corrupt_input()
+self.noise = ScratchPad()
+self.clean = ScratchPad()
+self.define_behavioural(self.clean, self.input)
+self.define_behavioural(self.noise, self.noisy_input)
+self.define_regularization()  # call before cost
+self.define_cost(self.clean)
+self.define_cost(self.noise)
+self.define_gradients()
+self.define_interface()
+def define_behavioural(self,container, input):
+self.define_propup(container, input)
+self.define_propdown(container)
+def define_propup(self, container, input):
+container.hidden_activation = T.dot(input, self.w1) + self.b1
+container.hidden = self.hid_fn(container.hidden_activation)
+# DEPENDENCY: define_propup
+def define_propdown(self, container):
+container.output_activation = T.dot(container.hidden, self.w2) + self.b2
+container.output = self.hid_fn(container.output_activation)
+# TODO: fix regularization type (outside parameter ?)
+def define_regularization(self, regtype=None):
+if regtype == None:
+self.regularization = T.zero() # base model has no regularization!
+return
+self.reg_coef = theano.Member(T.scalar())
+self.regularization = self.reg_coef * get_reg_cost([self.w1,self.w2], 'L2')
+# DEPENDENCY: define_behavioural, define_regularization
+def define_cost(self, container):
+container.reconstruction_cost = self.reconstruction_costs(container.output)
+# TOTAL COST
+container.cost = container.reconstruction_cost
+if self.regularize:
+container.cost = container.cost + self.regularization
+# DEPENDENCY: define_cost
+def define_gradients(self):
+if not hasattr(self,'params'):
+self.params = []
+if self.tie_weights:
+self.params += [self.w1, self.b1, self.b2]
 else:
-self.w2 = self.w1.T
+self.params += [self.w1, self.w2, self.b1, self.b2]
-self.b1 = theano.Member(T.vector())
-self.b2 = theano.Member(T.vector())
+self.gradients = T.grad(self.noise.cost, self.params)
+self.updates = dict((p, p - self.lr * g) for p, g in \
+zip(self.params, self.gradients))
-# REGULARIZATION COST
-self.regularization = self.build_regularization()
+# DEPENDENCY: define_behavioural, define_regularization, define_cost, define_gradients
+def define_interface(self):
-### NOISELESS ###
+self.update = theano.Method(self.input, self.noise.cost, self.updates)
+self.compute_cost = theano.Method(self.input, self.clean.cost)
-# HIDDEN LAYER
+self.noisify = theano.Method(self.input, self.noisy_input)
-self.hidden_activation = T.dot(self.input, self.w1) + self.b1
+self.reconstruction = theano.Method(self.input, self.clean.output)
-self.hidden = self.hid_activation_function(self.hidden_activation)
+self.representation = theano.Method(self.input, self.clean.hidden)
+self.reconstruction_through_noise = theano.Method(self.input,\
-# RECONSTRUCTION LAYER
+[self.noisy_input, self.noise.output])
-self.output_activation = T.dot(self.hidden, self.w2) + self.b2
+self.validate = theano.Method(self.input, [self.clean.cost, self.clean.output])
-self.output = self.out_activation_function(self.output_activation)
-# RECONSTRUCTION COST
+def corrupt_input(self):
-self.reconstruction_costs = self.build_reconstruction_costs(self.output)
+self.noise_level = theano.Member(T.scalar())
-self.reconstruction_cost = T.mean(self.reconstruction_costs)
+return self.random.binomial(T.shape(self.input), 1, 1 - self.noise_level) * self.input
-# TOTAL COST
+# what about filter_scale ?
-self.cost = self.reconstruction_cost
+def _instance_initialize(self, obj, lr=None, seed=1, alloc=True, **init):
-if self.regularize:
-self.cost = self.cost + self.regularization
+init.setdefault('reg_coef', 0)
+init.setdefault('noise_level', 0)
+obj.lr = lr
-### WITH NOISE ###
-self.corrupted_input = self.build_corrupted_input()
+super(DAA, self)._instance_initialize(obj, **init)
-# HIDDEN LAYER
+self.R = N.random.RandomState(seed) if seed is not None else N.random
-self.nhidden_activation = T.dot(self.corrupted_input, self.w1) + self.b1
-self.nhidden = self.hid_activation_function(self.nhidden_activation)
-# RECONSTRUCTION LAYER
-self.noutput_activation = T.dot(self.nhidden, self.w2) + self.b2
-self.noutput = self.out_activation_function(self.noutput_activation)
-# RECONSTRUCTION COST
-self.nreconstruction_costs = self.build_reconstruction_costs(self.noutput)
-self.nreconstruction_cost = T.mean(self.nreconstruction_costs)
-# TOTAL COST
-self.ncost = self.nreconstruction_cost
-if self.regularize:
-self.ncost = self.ncost + self.regularization
-# GRADIENTS AND UPDATES
-if self.tie_weights:
-self.params = self.w1, self.b1, self.b2
-else:
-self.params = self.w1, self.w2, self.b1, self.b2
-gradients = T.grad(self.ncost, self.params)
-updates = dict((p, p - self.lr * g) for p, g in zip(self.params, gradients))
-# INTERFACE METHODS
-self.update = theano.Method(self.input, self.ncost, updates)
-self.compute_cost = theano.Method(self.input, self.cost)
-self.noisify = theano.Method(self.input, self.corrupted_input)
-self.reconstruction = theano.Method(self.input, self.output)
-self.representation = theano.Method(self.input, self.hidden)
-self.reconstruction_through_noise = theano.Method(self.input, [self.corrupted_input, self.noutput])
-self.validate = theano.Method(self.input, [self.cost, self.output])
-def _instance_initialize(self, obj, input_size = None, hidden_size = None, seed = None, **init):
-if (input_size is None) ^ (hidden_size is None):
-raise ValueError("Must specify input_size and hidden_size or neither.")
-super(DenoisingAA, self)._instance_initialize(obj, **init)
-if seed is not None:
-R = N.random.RandomState(seed)
-else:
-R = N.random
-if input_size is not None:
-sz = (input_size, hidden_size)
-inf = 1/N.sqrt(input_size)
-hif = 1/N.sqrt(hidden_size)
-obj.w1 = R.uniform(size = sz, low = -inf, high = inf)
-if not self.tie_weights:
-obj.w2 = R.uniform(size = list(reversed(sz)), low = -hif, high = hif)
-obj.b1 = N.zeros(hidden_size)
-obj.b2 = N.zeros(input_size)
 if seed is not None:
 obj.seed(seed)
 obj.__hide__ = ['params']
-def build_regularization(self):
+self.inf = 1/N.sqrt(self.input_size)
-"""
+self.hif = 1/N.sqrt(self.n_hid_per_pixel)
-@todo: Why do we need this function?
-"""
+if alloc:
-return T.zero() # no regularization!
+w1shp = (self.input_size, self.n_hid_per_pixel)
+w2shp = list(reversed(w1shp))
-class SigmoidXEDenoisingAA(DenoisingAA):
+obj.w1 = self.R.uniform(size=w1shp, low = -self.inf, high = self.inf)
-"""
+if not self.tie_weights:
-@todo: Merge this into the above.
+obj.w2 = self.R.uniform(size=w2shp, low=-self.hif, high=self.hif)
-@todo: Default noise level for all daa levels
-"""
+obj.b1 = N.zeros(self.n_hid_per_pixel)
+obj.b2 = N.zeros(self.input_size)
-def build_corrupted_input(self):
-self.noise_level = theano.Member(T.scalar())
-return self.random.binomial(T.shape(self.input), 1, 1 - self.noise_level) * self.input
+#TODO: these should be made generic
-def hid_activation_function(self, activation):
+############## HELPER FUNCTIONS #####################
-return self.activation_function(activation)
+def reconstruction_costs(self, output):
-def out_activation_function(self, activation):
-return self.activation_function(activation)
-def build_reconstruction_costs(self, output):
 return self.reconstruction_cost_function(self.input, output)
-def build_regularization(self):
-self.l2_coef = theano.Member(T.scalar())
+##############################################
-if self.tie_weights:
+#              QUADRATIC DAA                 #
-return self.l2_coef * T.sum(self.w1 * self.w1)
+##############################################
+class QuadraticDAA(DAA):
+def __init__(self, img_shape, n_hid_per_pixel, n_quadratic_filters=0,
+batch_size=4, regularize = True, hid_fn=NN.sigmoid,
+reconstruction_cost_function=cost.cross_entropy, **init):
+# set tied-weights to False for QDAAs
+super(QuadraticDAA, self).__init__(img_shape, n_hid_per_pixel, batch_size, regularize,
+False, hid_fn, reconstruction_cost_function, **init)
+self.n_quadratic_filters = n_quadratic_filters
+self.qfilters = [theano.Member(T.dmatrix()) \
+for i in xrange(n_quadratic_filters)]
+# TODO: verify with James that the formula is correct (without sqrt)
+def define_propup(self, container, input):
+if self.n_quadratic_filters:
+qsum = 0
+for qf in self.qfilters:
+qsum = qsum + T.dot(input, qf)**2
+container.hidden_activation = T.dot(input, self.w1) + self.b1 + qsum
 else:
-return self.l2_coef * (T.sum(self.w1 * self.w1) + T.sum(self.w2 * self.w2))
+container.hidden_activation = T.dot(input, self.w1) + self.b1
+container.hidden = self.hid_fn(container.hidden_activation)
-def _instance_initialize(self, obj, input_size = None, hidden_size = None, seed = None, **init):
-init.setdefault('noise_level', 0)
+def define_gradients(self):
-init.setdefault('l2_coef', 0)
+self.params = self.qfilters
-super(SigmoidXEDenoisingAA, self)._instance_initialize(obj, input_size, hidden_size, seed, **init)
+DAA.define_gradients(self)
+def _instance_initialize(self, obj, lr, seed=1, qfilter_relscale=.01, **init):
+# only call constructor of base-class if we are instantiating QuadraticDAA
+if self.__class__ == QuadraticDAA:
+super(QuadraticDAA, self)._instance_initialize(obj, lr, seed, **init)
+obj.qfilters = [self.R.uniform(size=obj.w1.shape, low=-self.inf, high=self.inf)*\
+qfilter_relscale for qf in self.qfilters]
+##############################################
+#        SPARSE QUADRATIC DAA                #
+##############################################
+class SparseQuadraticDAA(QuadraticDAA):
+def __init__(self, img_shape, n_hid_per_pixel,
+filter_shape, step_size=(1,1), conv_mode='valid',
+n_quadratic_filters=0, batch_size=4,
+regularize = True, hid_fn=NN.sigmoid,
+reconstruction_cost_function=cost.cross_entropy, **init):
+QuadraticDAA.__init__(self, img_shape, n_hid_per_pixel, n_quadratic_filters,
+batch_size, regularize, hid_fn, reconstruction_cost_function, **init)
+# need to override parameters for sparse operations (vector instead of matrix)
+self.w1 = theano.Member(T.dvector())
+self.w2 = theano.Member(T.dmatrix())
+self.qfilters = [theano.Member(T.dvector()) for i in xrange(n_quadratic_filters)]
+self.filter_shape = filter_shape
+self.step_size = step_size
+self.conv_mode = conv_mode
+def define_propup(self, container, input):
+lin_hid_activ, self.hid_shape = sp.applySparseFilter(\
+self.w1, self.filter_shape, self.n_hid_per_pixel,
+self.input, self.img_shape, self.step_size, self.conv_mode)
+self.nl1feats = N.prod(self.hid_shape)
+# apply quadratic filters
+qsum = 0
+for qf in self.qfilters:
+temp, hidshape = sp.applySparseFilter(qf, self.filter_shape,\
+self.n_hid_per_pixel, self.input, self.img_shape,
+self.step_size, self.conv_mode)
+qsum = qsum + temp**2
+quad_hid_activ = qsum
+hid_activ = lin_hid_activ + quad_hid_activ if self.n_quadratic_filters \
+else lin_hid_activ
+container.hidden_activation = hid_activ
+container.hidden = self.hid_fn(container.hidden_activation)
+def define_propdown(self, container):
+pass
+def _instance_initialize(self, obj, lr, seed=1, qfilter_relscale=.01, **init):
+# change weight shapes based on sparse weight matrix parameters
+DAA._instance_initialize(self, obj, lr, seed, alloc=False, **init)
+# override weight initialization
+w1shp = N.prod(self.hid_shape)*N.prod(self.filter_shape)
+w2shp = (N.prod(self.hid_shape), self.input_size)
+obj.w1 = self.R.uniform(size=w1shp, low=-self.inf, high=self.inf)
+obj.w2 = self.R.uniform(size=w2shp, low=-self.hif, high=self.hif)
+obj.b1 = N.zeros(w1shp)
+obj.b2 = N.zeros(w2shp[1])
+QuadraticDAA._instance_initialize(self, obj, lr, seed, qfilter_relscale, **init)
+##############################################
+#       CONVOLUTIONAL QUADRATIC DAA          #
+##############################################
+class ConvQuadraticDAA(QuadraticDAA):
+def __init__(self, img_shape, n_hid_per_pixel,
+filter_shape, step_size=(1,1), conv_mode='valid',
+n_quadratic_filters=0, batch_size=4,
+regularize = True, hid_fn=NN.sigmoid,
+reconstruction_cost_function=cost.cross_entropy, **init):
+QuadraticDAA.__init__(self, img_shape, n_hid_per_pixel, n_quadratic_filters,
+batch_size, regularize, hid_fn, reconstruction_cost_function, **init)
+# need to override parameters for sparse operations (vector instead of matrix)
+self.w1 = theano.Member(T.dmatrix())
+self.w2 = theano.Member(T.dmatrix())
+self.b1 = theano.Member(T.dmatrix())
+self.qfilters = [theano.Member(T.dmatrix()) for i in xrange(n_quadratic_filters)]
+self.filter_shape = filter_shape
+self.step_size = step_size
+self.conv_mode = conv_mode
+def define_propup(self, container, input):
+lin_hid_activ, self.hid_shape = sp.convolve(self.w1, self.filter_shape,
+self.n_hid_per_pixel, self.input, self.img_shape, self.step_size,
+self.conv_mode, flatten=False)
+self.nl1feats = N.prod(self.hid_shape)
+# apply quadratic filters
+qsum = 0
+for qf in self.qfilters:
+temp, hidshape = sp.convolve(qf, self.filter_shape, self.n_hid_per_pixel,
+self.input, self.img_shape, self.step_size, self.conv_mode, flatten=False)
+qsum = qsum + temp**2
+quad_hid_activ = qsum
+hid_activ = lin_hid_activ + quad_hid_activ if self.n_quadratic_filters \
+else lin_hid_activ
+container.hidden_activation = hid_activ
+container.hidden = T.flatten(self.hid_fn(container.hidden_activation), 2)
+def define_propdown(self, container):
+pass
+def _instance_initialize(self, obj, lr, seed=1, qfilter_relscale=.01, **init):
+# change weight shapes based on sparse weight matrix parameters
+DAA._instance_initialize(self, obj, lr, seed, alloc=False, **init)
+# override weight initialization
+w1shp = (self.n_hid_per_pixel, N.prod(self.filter_shape))
+w2shp = (N.prod(self.hid_shape), self.input_size)
+obj.w1 = self.R.uniform(size=w1shp, low=-self.inf, high=self.inf)
+obj.w2 = self.R.uniform(size=w2shp, low=-self.hif, high=self.hif)
+obj.b1 = N.zeros((self.n_hid_per_pixel,1))
+obj.b2 = N.zeros(w2shp[1])
+QuadraticDAA._instance_initialize(self, obj, lr, seed, qfilter_relscale, **init)
+##############################################
+#                 TEST CODE
+##############################################
+def debug():
+img_shape = (3,3)
+n_hid_per_pixel = 1
+filter_shape = (2,2)
+step_size = (1,1)
+conv_mode = 'full'
+batch_size = 10
+R = N.random.RandomState(100)
+data = R.random_integers(0, 1, size=(batch_size, N.prod(img_shape)))
+print 'Instantiating DAA...',
+daa_model = DAA(img_shape, n_hid_per_pixel, batch_size=batch_size)
+daa_model.init_behavioural()
+daa = daa_model.make(lr=0.1)
+daa.update(data)
+print 'done'
+print 'Instantiating QuadraticDAA...',
+qdaa_model = QuadraticDAA(img_shape, n_hid_per_pixel,
+n_quadratic_filters=1, batch_size=batch_size)
+qdaa_model.init_behavioural()
+qdaa = qdaa_model.make(lr=0.1)
+qdaa.update(data)
+print 'done'
+print 'Instantiating SparseQuadraticDAA...',
+sp_qdaa_model = SparseQuadraticDAA(img_shape, n_hid_per_pixel,
+filter_shape, step_size, conv_mode,
+n_quadratic_filters=1, batch_size=batch_size)
+sp_qdaa_model.init_behavioural()
+sp_qdaa = sp_qdaa_model.make(lr=0.1)
+sp_qdaa.representation(data)
+sp_qdaa.reconstruction(data)
+sp_qdaa.update(data)
+print 'done!'
+print 'Instantiating ConvQuadraticDAA...',
+conv_qdaa_model = ConvQuadraticDAA(img_shape, n_hid_per_pixel,
+filter_shape, step_size, conv_mode,
+n_quadratic_filters=1, batch_size=batch_size)
+conv_qdaa_model.init_behavioural()
+conv_qdaa = conv_qdaa_model.make(lr=0.1)
+conv_qdaa.representation(data)
+conv_qdaa.reconstruction(data)
+conv_qdaa.update(data)
+print 'done!'
+def test():
+from pylearn.datasets import MNIST
+from pylearn.datasets import make_dataset
+import pylab as pl
+def showimg(x,y):
+for i in range(batch_size):
+pl.subplot(2,batch_size,i+1); pl.gray(); pl.axis('off');
+pl.imshow(x[i,:].reshape(img_shape))
+pl.subplot(2,batch_size,batch_size+i+1); pl.gray(); pl.axis('off');
+pl.imshow(y[i,:].reshape(img_shape))
+pl.show()
+img_shape = (28,28)
+n_hid_per_pixel = 1
+n_quadratic_filters = 0
+batch_size = 4
+epochs = 50
+lr = .01
+filter_shape = (5,5)
+step_size = (2,2)
+conv_mode = 'valid'
+dataset = make_dataset('MNIST',variant='1k')
+#print 'Instantiating DAA...',
+#daa_model = DAA(img_shape, n_hid_per_pixel, batch_size=batch_size, regularize=False)
+#daa_model.init_behavioural()
+#daa = daa_model.make(lr=lr, mode='FAST_RUN')
+#print 'done'
+#print 'Instantiating QuadraticDAA...',
+#daa_model = QuadraticDAA(img_shape, n_hid_per_pixel,
+#n_quadratic_filters=n_quadratic_filters, batch_size=batch_size)
+#daa_model.init_behavioural()
+#daa = daa_model.make(lr=0.1, mode='FAST_RUN')
+print 'Instantiating SparseQuadraticDAA...',
+daa_model = SparseQuadraticDAA(img_shape, n_hid_per_pixel,
+filter_shape, step_size, conv_mode,
+n_quadratic_filters=n_quadratic_filters, batch_size=batch_size)
+daa_model.init_behavioural()
+daa = daa_model.make(lr=0.1, mode='FAST_RUN')
+#print 'Instantiating ConvQuadraticDAA...',
+#daa_model = ConvQuadraticDAA(img_shape, n_hid_per_pixel,
+#filter_shape, step_size, conv_mode,
+#n_quadratic_filters=1, batch_size=batch_size)
+#daa_model.init_behavioural()
+#daa = daa_model.make(lr=0.1, mode='FAST_RUN')
+for ep in range(epochs):
+print '********** Epoch %i *********' % ep
+imgi=0
+for i in range(dataset.train.x.shape[0]/batch_size):
+x = dataset.train.x[imgi:imgi+batch_size,:]
+print daa.update(x)
+imgi += batch_size
+if (ep+1) % 1 == 0:
+starti = N.floor(N.random.rand()*(1000-4))
+x = dataset.train.x[starti:starti+batch_size,:]
+x_rec = daa.reconstruction(x)
+showimg(x,x_rec)
+if __name__ == '__main__':
+test()

Mercurial > pylearn

comparison pylearn/algorithms/daa.py @ 633:e242c12eb30d