ift6266: deep/stacked_dae/stacked

comparison deep/stacked_dae/stacked_dae.py @ 207:43af74a348ac

Merge branches from main repo.

author	Arnaud Bergeron <abergeron@gmail.com>
date	Thu, 04 Mar 2010 20:43:21 -0500
parents	e1f5f66dd7dd
children	acb942530923

comparison

equal deleted inserted replaced

-:e12702b88a2d
+:43af74a348ac
 import theano.tensor as T
 from theano.tensor.shared_randomstreams import RandomStreams
 import copy
 from utils import update_locals
+# taken from LeDeepNet/daa.py
+# has a special case when taking log(0) (defined =0)
+# modified to not take the mean anymore
+from theano.tensor.xlogx import xlogx, xlogy0
+# it's target*log(output)
+def binary_cross_entropy(target, output, sum_axis=1):
+XE = xlogy0(target, output) + xlogy0((1 - target), (1 - output))
+return -T.sum(XE, axis=sum_axis)
 class LogisticRegression(object):
 def __init__(self, input, n_in, n_out):
 # initialize with 0 the weights W as a matrix of shape (n_in, n_out)
 self.W = theano.shared( value=numpy.zeros((n_in,n_out),
 # Equation (3)
 self.z   = T.nnet.sigmoid(T.dot(self.y, self.W_prime) + self.b_prime)
 # Equation (4)
 # note : we sum over the size of a datapoint; if we are using minibatches,
 #        L will  be a vector, with one entry per example in minibatch
-self.L = - T.sum( self.x*T.log(self.z) + (1-self.x)*T.log(1-self.z), axis=1 )
+#self.L = - T.sum( self.x*T.log(self.z) + (1-self.x)*T.log(1-self.z), axis=1 )
+#self.L = binary_cross_entropy(target=self.x, output=self.z, sum_axis=1)
+# I added this epsilon to avoid getting log(0) and 1/0 in grad
+# This means conceptually that there'd be no probability of 0, but that
+# doesn't seem to me as important (maybe I'm wrong?).
+eps = 0.00000001
+eps_1 = 1-eps
+self.L = - T.sum( self.x * T.log(eps + eps_1*self.z) \
++ (1-self.x)*T.log(eps + eps_1*(1-self.z)), axis=1 )
 # note : L is now a vector, where each element is the cross-entropy cost
 #        of the reconstruction of the corresponding example of the
 #        minibatch. We need to compute the average of all these to get
 #        the cost of the minibatch
 self.cost = T.mean(self.L)
 self.params = [ self.W, self.b, self.b_prime ]
 class SdA(object):
 def __init__(self, train_set_x, train_set_y, batch_size, n_ins,
 hidden_layers_sizes, n_outs,
 corruption_levels, rng, pretrain_lr, finetune_lr, input_divider=1.0):
 # Just to make sure those are not modified somewhere else afterwards
 hidden_layers_sizes = copy.deepcopy(hidden_layers_sizes)
 corruption_levels = copy.deepcopy(corruption_levels)
 update_locals(self, locals())
 self.layers             = []
 self.pretrain_functions = []
 self.params             = []
 # MODIF: added this so we also get the b_primes
 # (not used for finetuning... still using ".params")
 self.all_params         = []
 self.n_layers           = len(hidden_layers_sizes)
+print "Creating SdA with params:"
+print "batch_size", batch_size
+print "hidden_layers_sizes", hidden_layers_sizes
+print "corruption_levels", corruption_levels
+print "n_ins", n_ins
+print "n_outs", n_outs
+print "pretrain_lr", pretrain_lr
+print "finetune_lr", finetune_lr
+print "input_divider", input_divider
+print "----"
 self.shared_divider = theano.shared(numpy.asarray(input_divider, dtype=theano.config.floatX))
 if len(hidden_layers_sizes) < 1 :
 raiseException (' You must have at least one hidden layer ')

Mercurial > ift6266

comparison deep/stacked_dae/stacked_dae.py @ 207:43af74a348ac