Mercurial > pylearn
view pylearn/algorithms/daa.py @ 633:e242c12eb30d
merged
author | desjagui@atchoum.iro.umontreal.ca |
---|---|
date | Wed, 21 Jan 2009 03:23:50 -0500 |
parents | b054271b2504 |
children | 89bc88affef0 |
line wrap: on
line source
import theano from theano import tensor as T from theano.tensor import nnet as NN from hpu.conv import sp import numpy as N from theano.printing import Print from pylearn.algorithms import cost # TODO: make this more generic (somewhere in pylearn) def lnorm(param, type='L2'): if type == 'L1': return T.sum(T.abs(param)) if type == 'L2': return T.sum(param*param) raise NotImplementedError('Only L1 and L2 regularization are currently implemented') def get_reg_cost(params, type): rcost = 0 for param in params: rcost += lnorm(param, type) return rcost class ScratchPad: pass class DAA(T.RModule): """De-noising Auto-encoder """ # TODO: change n_hid_per_pixel to nkern def __init__(self, img_shape, n_hid_per_pixel, batch_size=4, regularize = True, tie_weights = False, hid_fn=NN.sigmoid, reconstruction_cost_function=cost.cross_entropy, **init): """ :param regularize: WRITEME :param tie_weights: WRITEME :param hid_fn: WRITEME :param reconstruction_cost: Should return one cost per example (row) :todo: Default noise level for all daa levels """ super(DAA, self).__init__() # MODEL CONFIGURATION self.img_shape = img_shape self.input_size = N.prod(img_shape) self.n_hid_per_pixel = n_hid_per_pixel self.batch_size = batch_size self.regularize = regularize self.tie_weights = tie_weights self.hid_fn = hid_fn self.reconstruction_cost_function = reconstruction_cost_function ### DECLARE MODEL VARIABLES self.input = theano.External(T.dmatrix('input')) #parameters self.w1 = theano.Member(T.dmatrix()) self.w2 = self.w1.T if tie_weights else theano.Member(T.dmatrix()) self.b1 = theano.Member(T.dvector()) self.b2 = theano.Member(T.dvector()) #hyper-parameters self.lr = theano.Member(T.scalar()) ### BEHAVIOURAL MODEL def init_behavioural(self): self.noisy_input = self.corrupt_input() self.noise = ScratchPad() self.clean = ScratchPad() self.define_behavioural(self.clean, self.input) self.define_behavioural(self.noise, self.noisy_input) self.define_regularization() # call before cost self.define_cost(self.clean) self.define_cost(self.noise) self.define_gradients() self.define_interface() def define_behavioural(self,container, input): self.define_propup(container, input) self.define_propdown(container) def define_propup(self, container, input): container.hidden_activation = T.dot(input, self.w1) + self.b1 container.hidden = self.hid_fn(container.hidden_activation) # DEPENDENCY: define_propup def define_propdown(self, container): container.output_activation = T.dot(container.hidden, self.w2) + self.b2 container.output = self.hid_fn(container.output_activation) # TODO: fix regularization type (outside parameter ?) def define_regularization(self, regtype=None): if regtype == None: self.regularization = T.zero() # base model has no regularization! return self.reg_coef = theano.Member(T.scalar()) self.regularization = self.reg_coef * get_reg_cost([self.w1,self.w2], 'L2') # DEPENDENCY: define_behavioural, define_regularization def define_cost(self, container): container.reconstruction_cost = self.reconstruction_costs(container.output) # TOTAL COST container.cost = container.reconstruction_cost if self.regularize: container.cost = container.cost + self.regularization # DEPENDENCY: define_cost def define_gradients(self): if not hasattr(self,'params'): self.params = [] if self.tie_weights: self.params += [self.w1, self.b1, self.b2] else: self.params += [self.w1, self.w2, self.b1, self.b2] self.gradients = T.grad(self.noise.cost, self.params) self.updates = dict((p, p - self.lr * g) for p, g in \ zip(self.params, self.gradients)) # DEPENDENCY: define_behavioural, define_regularization, define_cost, define_gradients def define_interface(self): self.update = theano.Method(self.input, self.noise.cost, self.updates) self.compute_cost = theano.Method(self.input, self.clean.cost) self.noisify = theano.Method(self.input, self.noisy_input) self.reconstruction = theano.Method(self.input, self.clean.output) self.representation = theano.Method(self.input, self.clean.hidden) self.reconstruction_through_noise = theano.Method(self.input,\ [self.noisy_input, self.noise.output]) self.validate = theano.Method(self.input, [self.clean.cost, self.clean.output]) def corrupt_input(self): self.noise_level = theano.Member(T.scalar()) return self.random.binomial(T.shape(self.input), 1, 1 - self.noise_level) * self.input # what about filter_scale ? def _instance_initialize(self, obj, lr=None, seed=1, alloc=True, **init): init.setdefault('reg_coef', 0) init.setdefault('noise_level', 0) obj.lr = lr super(DAA, self)._instance_initialize(obj, **init) self.R = N.random.RandomState(seed) if seed is not None else N.random if seed is not None: obj.seed(seed) obj.__hide__ = ['params'] self.inf = 1/N.sqrt(self.input_size) self.hif = 1/N.sqrt(self.n_hid_per_pixel) if alloc: w1shp = (self.input_size, self.n_hid_per_pixel) w2shp = list(reversed(w1shp)) obj.w1 = self.R.uniform(size=w1shp, low = -self.inf, high = self.inf) if not self.tie_weights: obj.w2 = self.R.uniform(size=w2shp, low=-self.hif, high=self.hif) obj.b1 = N.zeros(self.n_hid_per_pixel) obj.b2 = N.zeros(self.input_size) #TODO: these should be made generic ############## HELPER FUNCTIONS ##################### def reconstruction_costs(self, output): return self.reconstruction_cost_function(self.input, output) ############################################## # QUADRATIC DAA # ############################################## class QuadraticDAA(DAA): def __init__(self, img_shape, n_hid_per_pixel, n_quadratic_filters=0, batch_size=4, regularize = True, hid_fn=NN.sigmoid, reconstruction_cost_function=cost.cross_entropy, **init): # set tied-weights to False for QDAAs super(QuadraticDAA, self).__init__(img_shape, n_hid_per_pixel, batch_size, regularize, False, hid_fn, reconstruction_cost_function, **init) self.n_quadratic_filters = n_quadratic_filters self.qfilters = [theano.Member(T.dmatrix()) \ for i in xrange(n_quadratic_filters)] # TODO: verify with James that the formula is correct (without sqrt) def define_propup(self, container, input): if self.n_quadratic_filters: qsum = 0 for qf in self.qfilters: qsum = qsum + T.dot(input, qf)**2 container.hidden_activation = T.dot(input, self.w1) + self.b1 + qsum else: container.hidden_activation = T.dot(input, self.w1) + self.b1 container.hidden = self.hid_fn(container.hidden_activation) def define_gradients(self): self.params = self.qfilters DAA.define_gradients(self) def _instance_initialize(self, obj, lr, seed=1, qfilter_relscale=.01, **init): # only call constructor of base-class if we are instantiating QuadraticDAA if self.__class__ == QuadraticDAA: super(QuadraticDAA, self)._instance_initialize(obj, lr, seed, **init) obj.qfilters = [self.R.uniform(size=obj.w1.shape, low=-self.inf, high=self.inf)*\ qfilter_relscale for qf in self.qfilters] ############################################## # SPARSE QUADRATIC DAA # ############################################## class SparseQuadraticDAA(QuadraticDAA): def __init__(self, img_shape, n_hid_per_pixel, filter_shape, step_size=(1,1), conv_mode='valid', n_quadratic_filters=0, batch_size=4, regularize = True, hid_fn=NN.sigmoid, reconstruction_cost_function=cost.cross_entropy, **init): QuadraticDAA.__init__(self, img_shape, n_hid_per_pixel, n_quadratic_filters, batch_size, regularize, hid_fn, reconstruction_cost_function, **init) # need to override parameters for sparse operations (vector instead of matrix) self.w1 = theano.Member(T.dvector()) self.w2 = theano.Member(T.dmatrix()) self.qfilters = [theano.Member(T.dvector()) for i in xrange(n_quadratic_filters)] self.filter_shape = filter_shape self.step_size = step_size self.conv_mode = conv_mode def define_propup(self, container, input): lin_hid_activ, self.hid_shape = sp.applySparseFilter(\ self.w1, self.filter_shape, self.n_hid_per_pixel, self.input, self.img_shape, self.step_size, self.conv_mode) self.nl1feats = N.prod(self.hid_shape) # apply quadratic filters qsum = 0 for qf in self.qfilters: temp, hidshape = sp.applySparseFilter(qf, self.filter_shape,\ self.n_hid_per_pixel, self.input, self.img_shape, self.step_size, self.conv_mode) qsum = qsum + temp**2 quad_hid_activ = qsum hid_activ = lin_hid_activ + quad_hid_activ if self.n_quadratic_filters \ else lin_hid_activ container.hidden_activation = hid_activ container.hidden = self.hid_fn(container.hidden_activation) def define_propdown(self, container): pass def _instance_initialize(self, obj, lr, seed=1, qfilter_relscale=.01, **init): # change weight shapes based on sparse weight matrix parameters DAA._instance_initialize(self, obj, lr, seed, alloc=False, **init) # override weight initialization w1shp = N.prod(self.hid_shape)*N.prod(self.filter_shape) w2shp = (N.prod(self.hid_shape), self.input_size) obj.w1 = self.R.uniform(size=w1shp, low=-self.inf, high=self.inf) obj.w2 = self.R.uniform(size=w2shp, low=-self.hif, high=self.hif) obj.b1 = N.zeros(w1shp) obj.b2 = N.zeros(w2shp[1]) QuadraticDAA._instance_initialize(self, obj, lr, seed, qfilter_relscale, **init) ############################################## # CONVOLUTIONAL QUADRATIC DAA # ############################################## class ConvQuadraticDAA(QuadraticDAA): def __init__(self, img_shape, n_hid_per_pixel, filter_shape, step_size=(1,1), conv_mode='valid', n_quadratic_filters=0, batch_size=4, regularize = True, hid_fn=NN.sigmoid, reconstruction_cost_function=cost.cross_entropy, **init): QuadraticDAA.__init__(self, img_shape, n_hid_per_pixel, n_quadratic_filters, batch_size, regularize, hid_fn, reconstruction_cost_function, **init) # need to override parameters for sparse operations (vector instead of matrix) self.w1 = theano.Member(T.dmatrix()) self.w2 = theano.Member(T.dmatrix()) self.b1 = theano.Member(T.dmatrix()) self.qfilters = [theano.Member(T.dmatrix()) for i in xrange(n_quadratic_filters)] self.filter_shape = filter_shape self.step_size = step_size self.conv_mode = conv_mode def define_propup(self, container, input): lin_hid_activ, self.hid_shape = sp.convolve(self.w1, self.filter_shape, self.n_hid_per_pixel, self.input, self.img_shape, self.step_size, self.conv_mode, flatten=False) self.nl1feats = N.prod(self.hid_shape) # apply quadratic filters qsum = 0 for qf in self.qfilters: temp, hidshape = sp.convolve(qf, self.filter_shape, self.n_hid_per_pixel, self.input, self.img_shape, self.step_size, self.conv_mode, flatten=False) qsum = qsum + temp**2 quad_hid_activ = qsum hid_activ = lin_hid_activ + quad_hid_activ if self.n_quadratic_filters \ else lin_hid_activ container.hidden_activation = hid_activ container.hidden = T.flatten(self.hid_fn(container.hidden_activation), 2) def define_propdown(self, container): pass def _instance_initialize(self, obj, lr, seed=1, qfilter_relscale=.01, **init): # change weight shapes based on sparse weight matrix parameters DAA._instance_initialize(self, obj, lr, seed, alloc=False, **init) # override weight initialization w1shp = (self.n_hid_per_pixel, N.prod(self.filter_shape)) w2shp = (N.prod(self.hid_shape), self.input_size) obj.w1 = self.R.uniform(size=w1shp, low=-self.inf, high=self.inf) obj.w2 = self.R.uniform(size=w2shp, low=-self.hif, high=self.hif) obj.b1 = N.zeros((self.n_hid_per_pixel,1)) obj.b2 = N.zeros(w2shp[1]) QuadraticDAA._instance_initialize(self, obj, lr, seed, qfilter_relscale, **init) ############################################## # TEST CODE ############################################## def debug(): img_shape = (3,3) n_hid_per_pixel = 1 filter_shape = (2,2) step_size = (1,1) conv_mode = 'full' batch_size = 10 R = N.random.RandomState(100) data = R.random_integers(0, 1, size=(batch_size, N.prod(img_shape))) print 'Instantiating DAA...', daa_model = DAA(img_shape, n_hid_per_pixel, batch_size=batch_size) daa_model.init_behavioural() daa = daa_model.make(lr=0.1) daa.update(data) print 'done' print 'Instantiating QuadraticDAA...', qdaa_model = QuadraticDAA(img_shape, n_hid_per_pixel, n_quadratic_filters=1, batch_size=batch_size) qdaa_model.init_behavioural() qdaa = qdaa_model.make(lr=0.1) qdaa.update(data) print 'done' print 'Instantiating SparseQuadraticDAA...', sp_qdaa_model = SparseQuadraticDAA(img_shape, n_hid_per_pixel, filter_shape, step_size, conv_mode, n_quadratic_filters=1, batch_size=batch_size) sp_qdaa_model.init_behavioural() sp_qdaa = sp_qdaa_model.make(lr=0.1) sp_qdaa.representation(data) sp_qdaa.reconstruction(data) sp_qdaa.update(data) print 'done!' print 'Instantiating ConvQuadraticDAA...', conv_qdaa_model = ConvQuadraticDAA(img_shape, n_hid_per_pixel, filter_shape, step_size, conv_mode, n_quadratic_filters=1, batch_size=batch_size) conv_qdaa_model.init_behavioural() conv_qdaa = conv_qdaa_model.make(lr=0.1) conv_qdaa.representation(data) conv_qdaa.reconstruction(data) conv_qdaa.update(data) print 'done!' def test(): from pylearn.datasets import MNIST from pylearn.datasets import make_dataset import pylab as pl def showimg(x,y): for i in range(batch_size): pl.subplot(2,batch_size,i+1); pl.gray(); pl.axis('off'); pl.imshow(x[i,:].reshape(img_shape)) pl.subplot(2,batch_size,batch_size+i+1); pl.gray(); pl.axis('off'); pl.imshow(y[i,:].reshape(img_shape)) pl.show() img_shape = (28,28) n_hid_per_pixel = 1 n_quadratic_filters = 0 batch_size = 4 epochs = 50 lr = .01 filter_shape = (5,5) step_size = (2,2) conv_mode = 'valid' dataset = make_dataset('MNIST',variant='1k') #print 'Instantiating DAA...', #daa_model = DAA(img_shape, n_hid_per_pixel, batch_size=batch_size, regularize=False) #daa_model.init_behavioural() #daa = daa_model.make(lr=lr, mode='FAST_RUN') #print 'done' #print 'Instantiating QuadraticDAA...', #daa_model = QuadraticDAA(img_shape, n_hid_per_pixel, #n_quadratic_filters=n_quadratic_filters, batch_size=batch_size) #daa_model.init_behavioural() #daa = daa_model.make(lr=0.1, mode='FAST_RUN') print 'Instantiating SparseQuadraticDAA...', daa_model = SparseQuadraticDAA(img_shape, n_hid_per_pixel, filter_shape, step_size, conv_mode, n_quadratic_filters=n_quadratic_filters, batch_size=batch_size) daa_model.init_behavioural() daa = daa_model.make(lr=0.1, mode='FAST_RUN') #print 'Instantiating ConvQuadraticDAA...', #daa_model = ConvQuadraticDAA(img_shape, n_hid_per_pixel, #filter_shape, step_size, conv_mode, #n_quadratic_filters=1, batch_size=batch_size) #daa_model.init_behavioural() #daa = daa_model.make(lr=0.1, mode='FAST_RUN') for ep in range(epochs): print '********** Epoch %i *********' % ep imgi=0 for i in range(dataset.train.x.shape[0]/batch_size): x = dataset.train.x[imgi:imgi+batch_size,:] print daa.update(x) imgi += batch_size if (ep+1) % 1 == 0: starti = N.floor(N.random.rand()*(1000-4)) x = dataset.train.x[starti:starti+batch_size,:] x_rec = daa.reconstruction(x) showimg(x,x_rec) if __name__ == '__main__': test()