ift6266: deep/stacked_dae/stacked

comparison deep/stacked_dae/stacked_dae.py @ 275:7b4507295eba

merge

author	Xavier Glorot <glorotxa@iro.umontreal.ca>
date	Mon, 22 Mar 2010 10:20:10 -0400
parents	acb942530923 c8fe09a65039
children

comparison

equal deleted inserted replaced

-:44409b6652aa
+:7b4507295eba
 #        third argument is the probability of success of any trial
 #
 #        this will produce an array of 0s and 1s where 1 has a
 #        probability of 1 - ``corruption_level`` and 0 with
 #        ``corruption_level``
-self.tilde_x  = theano_rng.binomial( self.x.shape,  1,  1 - corruption_level) * self.x
+self.tilde_x  = theano_rng.binomial( self.x.shape,  1,  1 - corruption_level, dtype=theano.config.floatX) * self.x
 # Equation (2)
 # note  : y is stored as an attribute of the class so that it can be
 #         used later when stacking dAs.
 self.y   = T.nnet.sigmoid(T.dot(self.tilde_x, self.W      ) + self.b)
 # Equation (3)
-self.z   = T.nnet.sigmoid(T.dot(self.y, self.W_prime) + self.b_prime)
+#self.z   = T.nnet.sigmoid(T.dot(self.y, self.W_prime) + self.b_prime)
 # Equation (4)
 # note : we sum over the size of a datapoint; if we are using minibatches,
 #        L will  be a vector, with one entry per example in minibatch
 #self.L = - T.sum( self.x*T.log(self.z) + (1-self.x)*T.log(1-self.z), axis=1 )
 #self.L = binary_cross_entropy(target=self.x, output=self.z, sum_axis=1)
 # bypassing z to avoid running to log(0)
-#self.z_a = T.dot(self.y, self.W_prime) + self.b_prime)
+z_a = T.dot(self.y, self.W_prime) + self.b_prime
-#self.L = -T.sum( self.x * (T.log(1)-T.log(1+T.exp(-self.z_a))) \
+log_sigmoid = T.log(1.) - T.log(1.+T.exp(-z_a))
-#                + (1.0-self.x) * (T.log(1)-T.log(1+T.exp(-self.z_a))), axis=1 )
+# log(1-sigmoid(z_a))
+log_1_sigmoid = -z_a - T.log(1.+T.exp(-z_a))
+self.L = -T.sum( self.x * (log_sigmoid) \
++ (1.0-self.x) * (log_1_sigmoid), axis=1 )
 # I added this epsilon to avoid getting log(0) and 1/0 in grad
 # This means conceptually that there'd be no probability of 0, but that
 # doesn't seem to me as important (maybe I'm wrong?).
-eps = 0.00000001
+#eps = 0.00000001
-eps_1 = 1-eps
+#eps_1 = 1-eps
-self.L = - T.sum( self.x * T.log(eps + eps_1*self.z) \
+#self.L = - T.sum( self.x * T.log(eps + eps_1*self.z) \
-+ (1-self.x)*T.log(eps + eps_1*(1-self.z)), axis=1 )
+#                + (1-self.x)*T.log(eps + eps_1*(1-self.z)), axis=1 )
 # note : L is now a vector, where each element is the cross-entropy cost
 #        of the reconstruction of the corresponding example of the
 #        minibatch. We need to compute the average of all these to get
 #        the cost of the minibatch
 self.cost = T.mean(self.L)
 self.params = [ self.W, self.b, self.b_prime ]
 class SdA(object):
-def __init__(self, train_set_x, train_set_y, batch_size, n_ins,
+def __init__(self, batch_size, n_ins,
 hidden_layers_sizes, n_outs,
-corruption_levels, rng, pretrain_lr, finetune_lr, input_divider=1.0):
+corruption_levels, rng, pretrain_lr, finetune_lr):
 # Just to make sure those are not modified somewhere else afterwards
 hidden_layers_sizes = copy.deepcopy(hidden_layers_sizes)
 corruption_levels = copy.deepcopy(corruption_levels)
 update_locals(self, locals())
 print "corruption_levels", corruption_levels
 print "n_ins", n_ins
 print "n_outs", n_outs
 print "pretrain_lr", pretrain_lr
 print "finetune_lr", finetune_lr
-print "input_divider", input_divider
 print "----"
-self.shared_divider = theano.shared(numpy.asarray(input_divider, dtype=theano.config.floatX))
 if len(hidden_layers_sizes) < 1 :
 raiseException (' You must have at least one hidden layer ')
 # allocate symbolic variables for the data
-index   = T.lscalar()    # index to a [mini]batch
+#index   = T.lscalar()    # index to a [mini]batch
 self.x  = T.matrix('x')  # the data is presented as rasterized images
 self.y  = T.ivector('y') # the labels are presented as 1D vector of
 # [int] labels
 for i in xrange( self.n_layers ):
 updates = {}
 for param, gparam in zip(dA_layer.params, gparams):
 updates[param] = param - gparam * pretrain_lr
 # create a function that trains the dA
-update_fn = theano.function([index], dA_layer.cost, \
+update_fn = theano.function([self.x], dA_layer.cost, \
-updates = updates,
+updates = updates)#,
-givens = {
+#     givens = {
-self.x : train_set_x[index*batch_size:(index+1)*batch_size] / self.shared_divider})
+#         self.x : ensemble})
+# collect this function into a list
+#update_fn = theano.function([index], dA_layer.cost, \
+#      updates = updates,
+#      givens = {
+#         self.x : train_set_x[index*batch_size:(index+1)*batch_size] / self.shared_divider})
 # collect this function into a list
 self.pretrain_functions += [update_fn]
 # We now need to add a logistic layer on top of the MLP
 # compute list of updates
 updates = {}
 for param,gparam in zip(self.params, gparams):
 updates[param] = param - gparam*finetune_lr
-self.finetune = theano.function([index], cost,
+self.finetune = theano.function([self.x,self.y], cost,
-updates = updates,
+updates = updates)#,
-givens = {
+#        givens = {
-self.x : train_set_x[index*batch_size:(index+1)*batch_size]/self.shared_divider,
+#          self.x : train_set_x[index*batch_size:(index+1)*batch_size]/self.shared_divider,
-self.y : train_set_y[index*batch_size:(index+1)*batch_size]} )
+#          self.y : train_set_y[index*batch_size:(index+1)*batch_size]} )
 # symbolic variable that points to the number of errors made on the
 # minibatch given by self.x and self.y
 self.errors = self.logLayer.errors(self.y)

Mercurial > ift6266

comparison deep/stacked_dae/stacked_dae.py @ 275:7b4507295eba