view pylearn/algorithms/daa.py @ 633:e242c12eb30d

merged
author desjagui@atchoum.iro.umontreal.ca
date Wed, 21 Jan 2009 03:23:50 -0500
parents b054271b2504
children 89bc88affef0
line wrap: on
line source

import theano
from theano import tensor as T
from theano.tensor import nnet as NN
from hpu.conv import sp
import numpy as N
from theano.printing import Print

from pylearn.algorithms import cost

# TODO: make this more generic (somewhere in pylearn)
def lnorm(param, type='L2'):
    if type == 'L1':
        return T.sum(T.abs(param))
    if type == 'L2':
        return T.sum(param*param)
    raise NotImplementedError('Only L1 and L2 regularization are currently implemented')

def get_reg_cost(params, type):
    rcost = 0
    for param in params:
        rcost += lnorm(param, type)
    return rcost

class ScratchPad:
    pass

class DAA(T.RModule):
    """De-noising Auto-encoder
    """

    # TODO: change n_hid_per_pixel to nkern
    def __init__(self, img_shape, n_hid_per_pixel,
                 batch_size=4, regularize = True, tie_weights = False,
                 hid_fn=NN.sigmoid, reconstruction_cost_function=cost.cross_entropy, **init):
        """
        :param regularize: WRITEME
        :param tie_weights: WRITEME
        :param hid_fn: WRITEME
        :param reconstruction_cost: Should return one cost per example (row)
        :todo: Default noise level for all daa levels

        """
        super(DAA, self).__init__()

        # MODEL CONFIGURATION
        self.img_shape = img_shape
        self.input_size = N.prod(img_shape)
        self.n_hid_per_pixel = n_hid_per_pixel
        self.batch_size = batch_size
        self.regularize = regularize
        self.tie_weights = tie_weights
        self.hid_fn = hid_fn
        self.reconstruction_cost_function = reconstruction_cost_function

        ### DECLARE MODEL VARIABLES
        self.input = theano.External(T.dmatrix('input'))
        
        #parameters
        self.w1 = theano.Member(T.dmatrix())
        self.w2 = self.w1.T if tie_weights else theano.Member(T.dmatrix())
        self.b1 = theano.Member(T.dvector())
        self.b2 = theano.Member(T.dvector())
       
        #hyper-parameters
        self.lr = theano.Member(T.scalar())


    ### BEHAVIOURAL MODEL
    def init_behavioural(self):
        self.noisy_input = self.corrupt_input()
        self.noise = ScratchPad()
        self.clean = ScratchPad()
        self.define_behavioural(self.clean, self.input)
        self.define_behavioural(self.noise, self.noisy_input)
        self.define_regularization()  # call before cost
        self.define_cost(self.clean)
        self.define_cost(self.noise)
        self.define_gradients()
        self.define_interface()


    def define_behavioural(self,container, input):
        self.define_propup(container, input)
        self.define_propdown(container)

    def define_propup(self, container, input):
        container.hidden_activation = T.dot(input, self.w1) + self.b1
        container.hidden = self.hid_fn(container.hidden_activation)

    # DEPENDENCY: define_propup
    def define_propdown(self, container):
        container.output_activation = T.dot(container.hidden, self.w2) + self.b2
        container.output = self.hid_fn(container.output_activation)


    # TODO: fix regularization type (outside parameter ?)
    def define_regularization(self, regtype=None):
        if regtype == None:
            self.regularization = T.zero() # base model has no regularization!
            return
        self.reg_coef = theano.Member(T.scalar())
        self.regularization = self.reg_coef * get_reg_cost([self.w1,self.w2], 'L2')


    # DEPENDENCY: define_behavioural, define_regularization
    def define_cost(self, container):
        container.reconstruction_cost = self.reconstruction_costs(container.output)
        # TOTAL COST
        container.cost = container.reconstruction_cost
        if self.regularize:
            container.cost = container.cost + self.regularization


    # DEPENDENCY: define_cost
    def define_gradients(self):
        if not hasattr(self,'params'):
            self.params = []
        if self.tie_weights:
            self.params += [self.w1, self.b1, self.b2]
        else:
            self.params += [self.w1, self.w2, self.b1, self.b2]

        self.gradients = T.grad(self.noise.cost, self.params)
        self.updates = dict((p, p - self.lr * g) for p, g in \
                zip(self.params, self.gradients))


    # DEPENDENCY: define_behavioural, define_regularization, define_cost, define_gradients
    def define_interface(self):
        self.update = theano.Method(self.input, self.noise.cost, self.updates)
        self.compute_cost = theano.Method(self.input, self.clean.cost)
        self.noisify = theano.Method(self.input, self.noisy_input)
        self.reconstruction = theano.Method(self.input, self.clean.output)
        self.representation = theano.Method(self.input, self.clean.hidden)
        self.reconstruction_through_noise = theano.Method(self.input,\
                [self.noisy_input, self.noise.output])
        self.validate = theano.Method(self.input, [self.clean.cost, self.clean.output])
    

    def corrupt_input(self):
        self.noise_level = theano.Member(T.scalar())
        return self.random.binomial(T.shape(self.input), 1, 1 - self.noise_level) * self.input

    # what about filter_scale ?
    def _instance_initialize(self, obj, lr=None, seed=1, alloc=True, **init):
       
        init.setdefault('reg_coef', 0)
        init.setdefault('noise_level', 0)
        obj.lr = lr

        super(DAA, self)._instance_initialize(obj, **init)
        
        self.R = N.random.RandomState(seed) if seed is not None else N.random
        if seed is not None:
            obj.seed(seed)
        obj.__hide__ = ['params']

        self.inf = 1/N.sqrt(self.input_size)
        self.hif = 1/N.sqrt(self.n_hid_per_pixel)

        if alloc:
            w1shp = (self.input_size, self.n_hid_per_pixel)
            w2shp = list(reversed(w1shp))
            
            obj.w1 = self.R.uniform(size=w1shp, low = -self.inf, high = self.inf)
            if not self.tie_weights:
                obj.w2 = self.R.uniform(size=w2shp, low=-self.hif, high=self.hif)
           
            obj.b1 = N.zeros(self.n_hid_per_pixel)
            obj.b2 = N.zeros(self.input_size)



    #TODO: these should be made generic
    ############## HELPER FUNCTIONS #####################
    def reconstruction_costs(self, output):
        return self.reconstruction_cost_function(self.input, output)


##############################################
#              QUADRATIC DAA                 #
##############################################
class QuadraticDAA(DAA):

    def __init__(self, img_shape, n_hid_per_pixel, n_quadratic_filters=0,
                 batch_size=4, regularize = True, hid_fn=NN.sigmoid,
                 reconstruction_cost_function=cost.cross_entropy, **init):

        # set tied-weights to False for QDAAs
        super(QuadraticDAA, self).__init__(img_shape, n_hid_per_pixel, batch_size, regularize,
                False, hid_fn, reconstruction_cost_function, **init)

        self.n_quadratic_filters = n_quadratic_filters
        self.qfilters = [theano.Member(T.dmatrix()) \
                for i in xrange(n_quadratic_filters)]

    # TODO: verify with James that the formula is correct (without sqrt)
    def define_propup(self, container, input):
        if self.n_quadratic_filters:
            qsum = 0
            for qf in self.qfilters:
                qsum = qsum + T.dot(input, qf)**2
            container.hidden_activation = T.dot(input, self.w1) + self.b1 + qsum
        else:
            container.hidden_activation = T.dot(input, self.w1) + self.b1
        container.hidden = self.hid_fn(container.hidden_activation)

    def define_gradients(self):
        self.params = self.qfilters
        DAA.define_gradients(self)

    def _instance_initialize(self, obj, lr, seed=1, qfilter_relscale=.01, **init):
        # only call constructor of base-class if we are instantiating QuadraticDAA
        if self.__class__ == QuadraticDAA:
            super(QuadraticDAA, self)._instance_initialize(obj, lr, seed, **init)
        obj.qfilters = [self.R.uniform(size=obj.w1.shape, low=-self.inf, high=self.inf)*\
                qfilter_relscale for qf in self.qfilters]



##############################################
#        SPARSE QUADRATIC DAA                #
##############################################
class SparseQuadraticDAA(QuadraticDAA):

    def __init__(self, img_shape, n_hid_per_pixel,
                 filter_shape, step_size=(1,1), conv_mode='valid',
                 n_quadratic_filters=0, batch_size=4,
                 regularize = True, hid_fn=NN.sigmoid,
                 reconstruction_cost_function=cost.cross_entropy, **init):

        QuadraticDAA.__init__(self, img_shape, n_hid_per_pixel, n_quadratic_filters,
                batch_size, regularize, hid_fn, reconstruction_cost_function, **init)
       
        # need to override parameters for sparse operations (vector instead of matrix)
        self.w1 = theano.Member(T.dvector())
        self.w2 = theano.Member(T.dmatrix())
        self.qfilters = [theano.Member(T.dvector()) for i in xrange(n_quadratic_filters)]

        self.filter_shape = filter_shape
        self.step_size = step_size
        self.conv_mode = conv_mode

    def define_propup(self, container, input):

        lin_hid_activ, self.hid_shape = sp.applySparseFilter(\
                self.w1, self.filter_shape, self.n_hid_per_pixel,
                self.input, self.img_shape, self.step_size, self.conv_mode)
        self.nl1feats = N.prod(self.hid_shape)

        # apply quadratic filters
        qsum = 0
        for qf in self.qfilters:
            temp, hidshape = sp.applySparseFilter(qf, self.filter_shape,\
                    self.n_hid_per_pixel, self.input, self.img_shape,
                    self.step_size, self.conv_mode)
            qsum = qsum + temp**2
        quad_hid_activ = qsum

        hid_activ = lin_hid_activ + quad_hid_activ if self.n_quadratic_filters \
                    else lin_hid_activ

        container.hidden_activation = hid_activ
        container.hidden = self.hid_fn(container.hidden_activation)

    def define_propdown(self, container):
        pass

    def _instance_initialize(self, obj, lr, seed=1, qfilter_relscale=.01, **init):

        # change weight shapes based on sparse weight matrix parameters
        DAA._instance_initialize(self, obj, lr, seed, alloc=False, **init)

        # override weight initialization
        w1shp = N.prod(self.hid_shape)*N.prod(self.filter_shape)
        w2shp = (N.prod(self.hid_shape), self.input_size)
        obj.w1 = self.R.uniform(size=w1shp, low=-self.inf, high=self.inf)
        obj.w2 = self.R.uniform(size=w2shp, low=-self.hif, high=self.hif)
        obj.b1 = N.zeros(w1shp)
        obj.b2 = N.zeros(w2shp[1])

        QuadraticDAA._instance_initialize(self, obj, lr, seed, qfilter_relscale, **init)



##############################################
#       CONVOLUTIONAL QUADRATIC DAA          #
##############################################
class ConvQuadraticDAA(QuadraticDAA):

    def __init__(self, img_shape, n_hid_per_pixel,
                 filter_shape, step_size=(1,1), conv_mode='valid',
                 n_quadratic_filters=0, batch_size=4,
                 regularize = True, hid_fn=NN.sigmoid,
                 reconstruction_cost_function=cost.cross_entropy, **init):

        QuadraticDAA.__init__(self, img_shape, n_hid_per_pixel, n_quadratic_filters,
                batch_size, regularize, hid_fn, reconstruction_cost_function, **init)
       
        # need to override parameters for sparse operations (vector instead of matrix)
        self.w1 = theano.Member(T.dmatrix())
        self.w2 = theano.Member(T.dmatrix())
        self.b1 = theano.Member(T.dmatrix())
        self.qfilters = [theano.Member(T.dmatrix()) for i in xrange(n_quadratic_filters)]

        self.filter_shape = filter_shape
        self.step_size = step_size
        self.conv_mode = conv_mode

    def define_propup(self, container, input):

        lin_hid_activ, self.hid_shape = sp.convolve(self.w1, self.filter_shape,
                self.n_hid_per_pixel, self.input, self.img_shape, self.step_size, 
                self.conv_mode, flatten=False)
        self.nl1feats = N.prod(self.hid_shape)

        # apply quadratic filters
        qsum = 0
        for qf in self.qfilters:
            temp, hidshape = sp.convolve(qf, self.filter_shape, self.n_hid_per_pixel,
                    self.input, self.img_shape, self.step_size, self.conv_mode, flatten=False)
            qsum = qsum + temp**2
        quad_hid_activ = qsum

        hid_activ = lin_hid_activ + quad_hid_activ if self.n_quadratic_filters \
                    else lin_hid_activ

        container.hidden_activation = hid_activ
        container.hidden = T.flatten(self.hid_fn(container.hidden_activation), 2)

    def define_propdown(self, container):
        pass


    def _instance_initialize(self, obj, lr, seed=1, qfilter_relscale=.01, **init):

        # change weight shapes based on sparse weight matrix parameters
        DAA._instance_initialize(self, obj, lr, seed, alloc=False, **init)

        # override weight initialization
        w1shp = (self.n_hid_per_pixel, N.prod(self.filter_shape))
        w2shp = (N.prod(self.hid_shape), self.input_size)
        obj.w1 = self.R.uniform(size=w1shp, low=-self.inf, high=self.inf)
        obj.w2 = self.R.uniform(size=w2shp, low=-self.hif, high=self.hif)
        obj.b1 = N.zeros((self.n_hid_per_pixel,1))
        obj.b2 = N.zeros(w2shp[1])

        QuadraticDAA._instance_initialize(self, obj, lr, seed, qfilter_relscale, **init)


##############################################
#                 TEST CODE
##############################################
def debug():
    img_shape = (3,3)
    n_hid_per_pixel = 1
    filter_shape = (2,2)
    step_size = (1,1)
    conv_mode = 'full'
    batch_size = 10

    R = N.random.RandomState(100)
    data = R.random_integers(0, 1, size=(batch_size, N.prod(img_shape)))

    print 'Instantiating DAA...',
    daa_model = DAA(img_shape, n_hid_per_pixel, batch_size=batch_size)
    daa_model.init_behavioural()
    daa = daa_model.make(lr=0.1)
    daa.update(data)
    print 'done'

    print 'Instantiating QuadraticDAA...',
    qdaa_model = QuadraticDAA(img_shape, n_hid_per_pixel,
            n_quadratic_filters=1, batch_size=batch_size)
    qdaa_model.init_behavioural()
    qdaa = qdaa_model.make(lr=0.1)
    qdaa.update(data)
    print 'done'

    print 'Instantiating SparseQuadraticDAA...',
    sp_qdaa_model = SparseQuadraticDAA(img_shape, n_hid_per_pixel,
            filter_shape, step_size, conv_mode, 
            n_quadratic_filters=1, batch_size=batch_size)
    sp_qdaa_model.init_behavioural()
    sp_qdaa = sp_qdaa_model.make(lr=0.1)
    sp_qdaa.representation(data)
    sp_qdaa.reconstruction(data)
    sp_qdaa.update(data)
    print 'done!'

    print 'Instantiating ConvQuadraticDAA...',
    conv_qdaa_model = ConvQuadraticDAA(img_shape, n_hid_per_pixel,
            filter_shape, step_size, conv_mode, 
            n_quadratic_filters=1, batch_size=batch_size)
    conv_qdaa_model.init_behavioural()
    conv_qdaa = conv_qdaa_model.make(lr=0.1)
    conv_qdaa.representation(data)
    conv_qdaa.reconstruction(data)
    conv_qdaa.update(data)
    print 'done!'
   
def test():
    
    from pylearn.datasets import MNIST
    from pylearn.datasets import make_dataset
    import pylab as pl

    def showimg(x,y):
        for i in range(batch_size):
            pl.subplot(2,batch_size,i+1); pl.gray(); pl.axis('off');
            pl.imshow(x[i,:].reshape(img_shape))
            pl.subplot(2,batch_size,batch_size+i+1); pl.gray(); pl.axis('off');
            pl.imshow(y[i,:].reshape(img_shape))
        pl.show()

    img_shape = (28,28)
    n_hid_per_pixel = 1
    n_quadratic_filters = 0
    batch_size = 4
    epochs = 50
    lr = .01
    filter_shape = (5,5)
    step_size = (2,2)
    conv_mode = 'valid'

    dataset = make_dataset('MNIST',variant='1k')

    #print 'Instantiating DAA...',
    #daa_model = DAA(img_shape, n_hid_per_pixel, batch_size=batch_size, regularize=False)
    #daa_model.init_behavioural()
    #daa = daa_model.make(lr=lr, mode='FAST_RUN')
    #print 'done'

    #print 'Instantiating QuadraticDAA...',
    #daa_model = QuadraticDAA(img_shape, n_hid_per_pixel,
            #n_quadratic_filters=n_quadratic_filters, batch_size=batch_size)
    #daa_model.init_behavioural()
    #daa = daa_model.make(lr=0.1, mode='FAST_RUN')

    print 'Instantiating SparseQuadraticDAA...',
    daa_model = SparseQuadraticDAA(img_shape, n_hid_per_pixel,
            filter_shape, step_size, conv_mode, 
            n_quadratic_filters=n_quadratic_filters, batch_size=batch_size)
    daa_model.init_behavioural()
    daa = daa_model.make(lr=0.1, mode='FAST_RUN')

    #print 'Instantiating ConvQuadraticDAA...',
    #daa_model = ConvQuadraticDAA(img_shape, n_hid_per_pixel,
            #filter_shape, step_size, conv_mode, 
            #n_quadratic_filters=1, batch_size=batch_size)
    #daa_model.init_behavioural()
    #daa = daa_model.make(lr=0.1, mode='FAST_RUN')
 
    for ep in range(epochs):
        print '********** Epoch %i *********' % ep
        imgi=0
        for i in range(dataset.train.x.shape[0]/batch_size):
            x = dataset.train.x[imgi:imgi+batch_size,:]
            print daa.update(x)
            imgi += batch_size

        if (ep+1) % 1 == 0:
            starti = N.floor(N.random.rand()*(1000-4))
            x = dataset.train.x[starti:starti+batch_size,:]
            x_rec = daa.reconstruction(x)
            showimg(x,x_rec)

if __name__ == '__main__':
    test()