Mercurial > ift6266
diff deep/stacked_dae/stacked_dae.py @ 207:43af74a348ac
Merge branches from main repo.
author | Arnaud Bergeron <abergeron@gmail.com> |
---|---|
date | Thu, 04 Mar 2010 20:43:21 -0500 |
parents | e1f5f66dd7dd |
children | acb942530923 |
line wrap: on
line diff
--- a/deep/stacked_dae/stacked_dae.py Thu Mar 04 09:43:23 2010 -0500 +++ b/deep/stacked_dae/stacked_dae.py Thu Mar 04 20:43:21 2010 -0500 @@ -10,6 +10,15 @@ from utils import update_locals +# taken from LeDeepNet/daa.py +# has a special case when taking log(0) (defined =0) +# modified to not take the mean anymore +from theano.tensor.xlogx import xlogx, xlogy0 +# it's target*log(output) +def binary_cross_entropy(target, output, sum_axis=1): + XE = xlogy0(target, output) + xlogy0((1 - target), (1 - output)) + return -T.sum(XE, axis=sum_axis) + class LogisticRegression(object): def __init__(self, input, n_in, n_out): # initialize with 0 the weights W as a matrix of shape (n_in, n_out) @@ -128,7 +137,16 @@ # Equation (4) # note : we sum over the size of a datapoint; if we are using minibatches, # L will be a vector, with one entry per example in minibatch - self.L = - T.sum( self.x*T.log(self.z) + (1-self.x)*T.log(1-self.z), axis=1 ) + #self.L = - T.sum( self.x*T.log(self.z) + (1-self.x)*T.log(1-self.z), axis=1 ) + #self.L = binary_cross_entropy(target=self.x, output=self.z, sum_axis=1) + + # I added this epsilon to avoid getting log(0) and 1/0 in grad + # This means conceptually that there'd be no probability of 0, but that + # doesn't seem to me as important (maybe I'm wrong?). + eps = 0.00000001 + eps_1 = 1-eps + self.L = - T.sum( self.x * T.log(eps + eps_1*self.z) \ + + (1-self.x)*T.log(eps + eps_1*(1-self.z)), axis=1 ) # note : L is now a vector, where each element is the cross-entropy cost # of the reconstruction of the corresponding example of the # minibatch. We need to compute the average of all these to get @@ -138,8 +156,6 @@ self.params = [ self.W, self.b, self.b_prime ] - - class SdA(object): def __init__(self, train_set_x, train_set_y, batch_size, n_ins, hidden_layers_sizes, n_outs, @@ -147,6 +163,7 @@ # Just to make sure those are not modified somewhere else afterwards hidden_layers_sizes = copy.deepcopy(hidden_layers_sizes) corruption_levels = copy.deepcopy(corruption_levels) + update_locals(self, locals()) self.layers = [] @@ -157,6 +174,17 @@ self.all_params = [] self.n_layers = len(hidden_layers_sizes) + print "Creating SdA with params:" + print "batch_size", batch_size + print "hidden_layers_sizes", hidden_layers_sizes + print "corruption_levels", corruption_levels + print "n_ins", n_ins + print "n_outs", n_outs + print "pretrain_lr", pretrain_lr + print "finetune_lr", finetune_lr + print "input_divider", input_divider + print "----" + self.shared_divider = theano.shared(numpy.asarray(input_divider, dtype=theano.config.floatX)) if len(hidden_layers_sizes) < 1 :