Mercurial > ift6266
diff deep/stacked_dae/stacked_dae.py @ 275:7b4507295eba
merge
author | Xavier Glorot <glorotxa@iro.umontreal.ca> |
---|---|
date | Mon, 22 Mar 2010 10:20:10 -0400 |
parents | acb942530923 c8fe09a65039 |
children |
line wrap: on
line diff
--- a/deep/stacked_dae/stacked_dae.py Mon Mar 22 10:19:45 2010 -0400 +++ b/deep/stacked_dae/stacked_dae.py Mon Mar 22 10:20:10 2010 -0400 @@ -127,13 +127,13 @@ # this will produce an array of 0s and 1s where 1 has a # probability of 1 - ``corruption_level`` and 0 with # ``corruption_level`` - self.tilde_x = theano_rng.binomial( self.x.shape, 1, 1 - corruption_level) * self.x + self.tilde_x = theano_rng.binomial( self.x.shape, 1, 1 - corruption_level, dtype=theano.config.floatX) * self.x # Equation (2) # note : y is stored as an attribute of the class so that it can be # used later when stacking dAs. self.y = T.nnet.sigmoid(T.dot(self.tilde_x, self.W ) + self.b) # Equation (3) - self.z = T.nnet.sigmoid(T.dot(self.y, self.W_prime) + self.b_prime) + #self.z = T.nnet.sigmoid(T.dot(self.y, self.W_prime) + self.b_prime) # Equation (4) # note : we sum over the size of a datapoint; if we are using minibatches, # L will be a vector, with one entry per example in minibatch @@ -141,17 +141,20 @@ #self.L = binary_cross_entropy(target=self.x, output=self.z, sum_axis=1) # bypassing z to avoid running to log(0) - #self.z_a = T.dot(self.y, self.W_prime) + self.b_prime) - #self.L = -T.sum( self.x * (T.log(1)-T.log(1+T.exp(-self.z_a))) \ - # + (1.0-self.x) * (T.log(1)-T.log(1+T.exp(-self.z_a))), axis=1 ) + z_a = T.dot(self.y, self.W_prime) + self.b_prime + log_sigmoid = T.log(1.) - T.log(1.+T.exp(-z_a)) + # log(1-sigmoid(z_a)) + log_1_sigmoid = -z_a - T.log(1.+T.exp(-z_a)) + self.L = -T.sum( self.x * (log_sigmoid) \ + + (1.0-self.x) * (log_1_sigmoid), axis=1 ) # I added this epsilon to avoid getting log(0) and 1/0 in grad # This means conceptually that there'd be no probability of 0, but that # doesn't seem to me as important (maybe I'm wrong?). - eps = 0.00000001 - eps_1 = 1-eps - self.L = - T.sum( self.x * T.log(eps + eps_1*self.z) \ - + (1-self.x)*T.log(eps + eps_1*(1-self.z)), axis=1 ) + #eps = 0.00000001 + #eps_1 = 1-eps + #self.L = - T.sum( self.x * T.log(eps + eps_1*self.z) \ + # + (1-self.x)*T.log(eps + eps_1*(1-self.z)), axis=1 ) # note : L is now a vector, where each element is the cross-entropy cost # of the reconstruction of the corresponding example of the # minibatch. We need to compute the average of all these to get @@ -162,9 +165,9 @@ class SdA(object): - def __init__(self, train_set_x, train_set_y, batch_size, n_ins, + def __init__(self, batch_size, n_ins, hidden_layers_sizes, n_outs, - corruption_levels, rng, pretrain_lr, finetune_lr, input_divider=1.0): + corruption_levels, rng, pretrain_lr, finetune_lr): # Just to make sure those are not modified somewhere else afterwards hidden_layers_sizes = copy.deepcopy(hidden_layers_sizes) corruption_levels = copy.deepcopy(corruption_levels) @@ -187,17 +190,14 @@ print "n_outs", n_outs print "pretrain_lr", pretrain_lr print "finetune_lr", finetune_lr - print "input_divider", input_divider print "----" - self.shared_divider = theano.shared(numpy.asarray(input_divider, dtype=theano.config.floatX)) - if len(hidden_layers_sizes) < 1 : raiseException (' You must have at least one hidden layer ') # allocate symbolic variables for the data - index = T.lscalar() # index to a [mini]batch + #index = T.lscalar() # index to a [mini]batch self.x = T.matrix('x') # the data is presented as rasterized images self.y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels @@ -244,10 +244,15 @@ updates[param] = param - gparam * pretrain_lr # create a function that trains the dA - update_fn = theano.function([index], dA_layer.cost, \ - updates = updates, - givens = { - self.x : train_set_x[index*batch_size:(index+1)*batch_size] / self.shared_divider}) + update_fn = theano.function([self.x], dA_layer.cost, \ + updates = updates)#, + # givens = { + # self.x : ensemble}) + # collect this function into a list + #update_fn = theano.function([index], dA_layer.cost, \ + # updates = updates, + # givens = { + # self.x : train_set_x[index*batch_size:(index+1)*batch_size] / self.shared_divider}) # collect this function into a list self.pretrain_functions += [update_fn] @@ -270,11 +275,11 @@ for param,gparam in zip(self.params, gparams): updates[param] = param - gparam*finetune_lr - self.finetune = theano.function([index], cost, - updates = updates, - givens = { - self.x : train_set_x[index*batch_size:(index+1)*batch_size]/self.shared_divider, - self.y : train_set_y[index*batch_size:(index+1)*batch_size]} ) + self.finetune = theano.function([self.x,self.y], cost, + updates = updates)#, + # givens = { + # self.x : train_set_x[index*batch_size:(index+1)*batch_size]/self.shared_divider, + # self.y : train_set_y[index*batch_size:(index+1)*batch_size]} ) # symbolic variable that points to the number of errors made on the # minibatch given by self.x and self.y