view deep/convolutional_dae/stacked_convolutional_dae.py @ 266:1e4e60ddadb1

Merge. Ah, et dans le dernier commit, j'avais oublié de mentionner que j'ai ajouté du code pour gérer l'isolation de différents clones pour rouler des expériences et modifier le code en même temps.
author fsavard
date Fri, 19 Mar 2010 10:56:16 -0400
parents 0c0f0b3f6a93
children
line wrap: on
line source

import numpy
import theano
import time
import sys
import theano.tensor as T
from theano.tensor.shared_randomstreams import RandomStreams
#import theano.sandbox.softsign

from theano.tensor.signal import downsample
from theano.tensor.nnet import conv 

from ift6266 import datasets
from ift6266.baseline.log_reg.log_reg import LogisticRegression

batch_size = 100

class SigmoidalLayer(object):
    def __init__(self, rng, input, n_in, n_out):

        self.input = input
 
        W_values = numpy.asarray( rng.uniform( \
              low = -numpy.sqrt(6./(n_in+n_out)), \
              high = numpy.sqrt(6./(n_in+n_out)), \
              size = (n_in, n_out)), dtype = theano.config.floatX)
        self.W = theano.shared(value = W_values)
 
        b_values = numpy.zeros((n_out,), dtype= theano.config.floatX)
        self.b = theano.shared(value= b_values)
 
        self.output = T.tanh(T.dot(input, self.W) + self.b)
        self.params = [self.W, self.b]
 
class dA_conv(object):
 
  def __init__(self, input, filter_shape, corruption_level = 0.1, 
               shared_W = None, shared_b = None, image_shape = None, 
               poolsize = (2,2)):

    theano_rng = RandomStreams()
    
    fan_in = numpy.prod(filter_shape[1:])
    fan_out = filter_shape[0] * numpy.prod(filter_shape[2:])

    center = theano.shared(value = 1, name="center")
    scale = theano.shared(value = 2, name="scale")

    if shared_W != None and shared_b != None :
        self.W = shared_W
        self.b = shared_b
    else:
        initial_W = numpy.asarray( numpy.random.uniform(
              low = -numpy.sqrt(6./(fan_in+fan_out)),
              high = numpy.sqrt(6./(fan_in+fan_out)),
              size = filter_shape), dtype = theano.config.floatX)
        initial_b = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX)
        self.W = theano.shared(value = initial_W, name = "W")
        self.b = theano.shared(value = initial_b, name = "b")
    
 
    initial_b_prime= numpy.zeros((filter_shape[1],),dtype=theano.config.floatX)

    self.b_prime = theano.shared(value = initial_b_prime, name = "b_prime")
 
    self.x = input

    self.tilde_x = theano_rng.binomial( self.x.shape, 1, 1 - corruption_level,dtype=theano.config.floatX) * self.x

    conv1_out = conv.conv2d(self.tilde_x, self.W, filter_shape=filter_shape,
                            image_shape=image_shape, border_mode='valid')
    
    self.y = T.tanh(conv1_out + self.b.dimshuffle('x', 0, 'x', 'x'))
    
    da_filter_shape = [ filter_shape[1], filter_shape[0], 
                        filter_shape[2], filter_shape[3] ]
    initial_W_prime =  numpy.asarray( numpy.random.uniform( \
              low = -numpy.sqrt(6./(fan_in+fan_out)), \
              high = numpy.sqrt(6./(fan_in+fan_out)), \
              size = da_filter_shape), dtype = theano.config.floatX)
    self.W_prime = theano.shared(value = initial_W_prime, name = "W_prime")

    conv2_out = conv.conv2d(self.y, self.W_prime,
                            filter_shape = da_filter_shape,
                            border_mode='full')

    self.z =  (T.tanh(conv2_out + self.b_prime.dimshuffle('x', 0, 'x', 'x'))+center) / scale

    scaled_x = (self.x + center) / scale

    self.L = - T.sum( scaled_x*T.log(self.z) + (1-scaled_x)*T.log(1-self.z), axis=1 )

    self.cost = T.mean(self.L)

    self.params = [ self.W, self.b, self.b_prime ] 

class LeNetConvPoolLayer(object):

    def __init__(self, rng, input, filter_shape, image_shape=None, poolsize=(2,2)):
        self.input = input
  
        W_values = numpy.zeros(filter_shape, dtype=theano.config.floatX)
        self.W = theano.shared(value=W_values)
 
        b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX)
        self.b = theano.shared(value=b_values)
 
        conv_out = conv.conv2d(input, self.W,
                filter_shape=filter_shape, image_shape=image_shape)
 

        fan_in = numpy.prod(filter_shape[1:])
        fan_out = filter_shape[0] * numpy.prod(filter_shape[2:]) / numpy.prod(poolsize)

        W_bound = numpy.sqrt(6./(fan_in + fan_out))
        self.W.value = numpy.asarray(
                rng.uniform(low=-W_bound, high=W_bound, size=filter_shape),
                dtype = theano.config.floatX)
  

        pooled_out = downsample.max_pool2D(conv_out, poolsize, ignore_border=True)
 
        self.output = T.tanh(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x'))
        self.params = [self.W, self.b]
 

class SdA():
    def __init__(self, input, n_ins_mlp, conv_hidden_layers_sizes,
                 mlp_hidden_layers_sizes, corruption_levels, rng, n_out, 
                 pretrain_lr, finetune_lr, img_shape):
        
        self.layers = []
        self.pretrain_functions = []
        self.params = []
        self.conv_n_layers = len(conv_hidden_layers_sizes)
        self.mlp_n_layers = len(mlp_hidden_layers_sizes)
        
        self.x = T.matrix('x') # the data is presented as rasterized images
        self.y = T.ivector('y') # the labels are presented as 1D vector of
        
        for i in xrange( self.conv_n_layers ):
            filter_shape=conv_hidden_layers_sizes[i][0]
            image_shape=conv_hidden_layers_sizes[i][1]
            max_poolsize=conv_hidden_layers_sizes[i][2]
                
            if i == 0 :
                layer_input=self.x.reshape((self.x.shape[0], 1) + img_shape)
            else:
                layer_input=self.layers[-1].output
            
            layer = LeNetConvPoolLayer(rng, input=layer_input,
                                       image_shape=image_shape,
                                       filter_shape=filter_shape,
                                       poolsize=max_poolsize)
            print 'Convolutional layer', str(i+1), 'created'
            
            self.layers += [layer]
            self.params += layer.params

            da_layer = dA_conv(corruption_level = corruption_levels[0],
                               input = layer_input,
                               shared_W = layer.W, shared_b = layer.b,
                               filter_shape = filter_shape,
                               image_shape = image_shape )
            
            gparams = T.grad(da_layer.cost, da_layer.params)
            
            updates = {}
            for param, gparam in zip(da_layer.params, gparams):
                updates[param] = param - gparam * pretrain_lr
            
            update_fn = theano.function([self.x], da_layer.cost, updates = updates)
            
            self.pretrain_functions += [update_fn]
            
        for i in xrange( self.mlp_n_layers ): 
            if i == 0 :
                input_size = n_ins_mlp
            else:
                input_size = mlp_hidden_layers_sizes[i-1]
            
            if i == 0 :
                if len( self.layers ) == 0 :
                    layer_input=self.x
                else :
                    layer_input = self.layers[-1].output.flatten(2)
            else:
                layer_input = self.layers[-1].output
            
            layer = SigmoidalLayer(rng, layer_input, input_size,
                                        mlp_hidden_layers_sizes[i] )
            
            self.layers += [layer]
            self.params += layer.params
            
            print 'MLP layer', str(i+1), 'created'
            
        self.logLayer = LogisticRegression(input=self.layers[-1].output, \
                                                     n_in=mlp_hidden_layers_sizes[-1], n_out=n_out)
        self.params += self.logLayer.params
        
        cost = self.logLayer.negative_log_likelihood(self.y)
        
        gparams = T.grad(cost, self.params)

        updates = {}
        for param,gparam in zip(self.params, gparams):
            updates[param] = param - gparam*finetune_lr
        
        self.finetune = theano.function([self.x, self.y], cost, updates = updates)
        
        self.errors = self.logLayer.errors(self.y)

def sgd_optimization_mnist(learning_rate=0.1, pretraining_epochs = 1,
                           pretrain_lr = 0.1, training_epochs = 1000,
                           kernels = [[4,5,5], [4,3,3]], mlp_layers=[500],
                           corruption_levels = [0.2, 0.2, 0.2], 
                           batch_size = batch_size, img_shape=(28, 28),
                           max_pool_layers = [[2,2], [2,2]],
                           dataset=datasets.mnist(5000)):
 
    # allocate symbolic variables for the data
    index = T.lscalar() # index to a [mini]batch
    x = T.matrix('x') # the data is presented as rasterized images
    y = T.ivector('y') # the labels are presented as 1d vector of
    # [int] labels

    layer0_input = x.reshape((x.shape[0],1)+img_shape)
    
    rng = numpy.random.RandomState(1234)
    conv_layers=[]
    init_layer = [[kernels[0][0],1,kernels[0][1],kernels[0][2]],
                  None, # do not specify the batch size since it can 
                        # change for the last one and then theano will 
                        # crash.
                  max_pool_layers[0]]
    conv_layers.append(init_layer)

    conv_n_out = (img_shape[0]-kernels[0][2]+1)/max_pool_layers[0][0]

    for i in range(1,len(kernels)):    
        layer = [[kernels[i][0],kernels[i-1][0],kernels[i][1],kernels[i][2]],
                 None, # same comment as for init_layer
                 max_pool_layers[i] ]
        conv_layers.append(layer)
        conv_n_out =  (conv_n_out - kernels[i][2]+1)/max_pool_layers[i][0]

    network = SdA(input = layer0_input, n_ins_mlp = kernels[-1][0]*conv_n_out**2,
                  conv_hidden_layers_sizes = conv_layers,
                  mlp_hidden_layers_sizes = mlp_layers,
                  corruption_levels = corruption_levels, n_out = 62,
                  rng = rng , pretrain_lr = pretrain_lr,
                  finetune_lr = learning_rate, img_shape=img_shape)

    test_model = theano.function([network.x, network.y], network.errors)
 
    start_time = time.clock()
    for i in xrange(len(network.layers)-len(mlp_layers)):
        for epoch in xrange(pretraining_epochs):
            for x, y in dataset.train(batch_size):
                c = network.pretrain_functions[i](x)
            print 'pre-training convolution layer %i, epoch %d, cost '%(i,epoch), c

    patience = 10000 # look as this many examples regardless
    patience_increase = 2. # WAIT THIS MUCH LONGER WHEN A NEW BEST IS
                                  # FOUND
    improvement_threshold = 0.995 # a relative improvement of this much is

    validation_frequency = patience/2
 
    best_params = None
    best_validation_loss = float('inf')
    test_score = 0.
    start_time = time.clock()
 
    done_looping = False
    epoch = 0
    iter = 0

    while (epoch < training_epochs) and (not done_looping):
      epoch = epoch + 1
      for x, y in dataset.train(batch_size):
 
        cost_ij = network.finetune(x, y)
        iter += 1
        
        if iter % validation_frequency == 0:
            validation_losses = [test_model(xv, yv) for xv, yv in dataset.valid(batch_size)]
            this_validation_loss = numpy.mean(validation_losses)
            print('epoch %i, iter %i, validation error %f %%' % \
                   (epoch, iter, this_validation_loss*100.))
            
            # if we got the best validation score until now
            if this_validation_loss < best_validation_loss:
 
                #improve patience if loss improvement is good enough
                if this_validation_loss < best_validation_loss * \
                       improvement_threshold :
                    patience = max(patience, iter * patience_increase)
                
                # save best validation score and iteration number
                best_validation_loss = this_validation_loss
                best_iter = iter
                
                # test it on the test set
                test_losses = [test_model(xt, yt) for xt, yt in dataset.test(batch_size)]
                test_score = numpy.mean(test_losses)
                print((' epoch %i, iter %i, test error of best '
                      'model %f %%') %
                             (epoch, iter, test_score*100.))
                
        if patience <= iter :
            done_looping = True
            break
    
    end_time = time.clock()
    print(('Optimization complete with best validation score of %f %%,'
           'with test performance %f %%') %
                 (best_validation_loss * 100., test_score*100.))
    print ('The code ran for %f minutes' % ((end_time-start_time)/60.))
 
if __name__ == '__main__':
    sgd_optimization_mnist()