Mercurial > ift6266
comparison deep/stacked_dae/stacked_dae.py @ 192:e656edaedb48
Commented a few things, renamed the produit_croise_jobs function, replaced the cost function (NOT TESTED YET).
author | fsavard |
---|---|
date | Wed, 03 Mar 2010 12:51:40 -0500 |
parents | 3632e6258642 |
children | e1f5f66dd7dd |
comparison
equal
deleted
inserted
replaced
191:3632e6258642 | 192:e656edaedb48 |
---|---|
7 import theano.tensor as T | 7 import theano.tensor as T |
8 from theano.tensor.shared_randomstreams import RandomStreams | 8 from theano.tensor.shared_randomstreams import RandomStreams |
9 import copy | 9 import copy |
10 | 10 |
11 from utils import update_locals | 11 from utils import update_locals |
12 | |
13 # taken from LeDeepNet/daa.py | |
14 # has a special case when taking log(0) (defined =0) | |
15 # modified to not take the mean anymore | |
16 from theano.tensor.xlogx import xlogx, xlogy0 | |
17 # it's target*log(output) | |
18 def binary_cross_entropy(target, output, sum_axis=1): | |
19 XE = xlogy0(target, output) + xlogy0((1 - target), (1 - output)) | |
20 return -T.sum(XE, axis=sum_axis) | |
12 | 21 |
13 class LogisticRegression(object): | 22 class LogisticRegression(object): |
14 def __init__(self, input, n_in, n_out): | 23 def __init__(self, input, n_in, n_out): |
15 # initialize with 0 the weights W as a matrix of shape (n_in, n_out) | 24 # initialize with 0 the weights W as a matrix of shape (n_in, n_out) |
16 self.W = theano.shared( value=numpy.zeros((n_in,n_out), | 25 self.W = theano.shared( value=numpy.zeros((n_in,n_out), |
126 # Equation (3) | 135 # Equation (3) |
127 self.z = T.nnet.sigmoid(T.dot(self.y, self.W_prime) + self.b_prime) | 136 self.z = T.nnet.sigmoid(T.dot(self.y, self.W_prime) + self.b_prime) |
128 # Equation (4) | 137 # Equation (4) |
129 # note : we sum over the size of a datapoint; if we are using minibatches, | 138 # note : we sum over the size of a datapoint; if we are using minibatches, |
130 # L will be a vector, with one entry per example in minibatch | 139 # L will be a vector, with one entry per example in minibatch |
131 self.L = - T.sum( self.x*T.log(self.z) + (1-self.x)*T.log(1-self.z), axis=1 ) | 140 #self.L = - T.sum( self.x*T.log(self.z) + (1-self.x)*T.log(1-self.z), axis=1 ) |
141 self.L = binary_cross_entropy(target=self.x, output=self.z, sum_axis=1) | |
132 # note : L is now a vector, where each element is the cross-entropy cost | 142 # note : L is now a vector, where each element is the cross-entropy cost |
133 # of the reconstruction of the corresponding example of the | 143 # of the reconstruction of the corresponding example of the |
134 # minibatch. We need to compute the average of all these to get | 144 # minibatch. We need to compute the average of all these to get |
135 # the cost of the minibatch | 145 # the cost of the minibatch |
136 self.cost = T.mean(self.L) | 146 self.cost = T.mean(self.L) |
153 self.params = [] | 163 self.params = [] |
154 # MODIF: added this so we also get the b_primes | 164 # MODIF: added this so we also get the b_primes |
155 # (not used for finetuning... still using ".params") | 165 # (not used for finetuning... still using ".params") |
156 self.all_params = [] | 166 self.all_params = [] |
157 self.n_layers = len(hidden_layers_sizes) | 167 self.n_layers = len(hidden_layers_sizes) |
168 | |
169 print "Creating SdA with params:" | |
170 print "batch_size", batch_size | |
171 print "hidden_layers_sizes", hidden_layers_sizes | |
172 print "corruption_levels", corruption_levels | |
173 print "n_ins", n_ins | |
174 print "n_outs", n_outs | |
175 print "pretrain_lr", pretrain_lr | |
176 print "finetune_lr", finetune_lr | |
177 print "input_divider", input_divider | |
178 print "----" | |
158 | 179 |
159 self.shared_divider = theano.shared(numpy.asarray(input_divider, dtype=theano.config.floatX)) | 180 self.shared_divider = theano.shared(numpy.asarray(input_divider, dtype=theano.config.floatX)) |
160 | 181 |
161 if len(hidden_layers_sizes) < 1 : | 182 if len(hidden_layers_sizes) < 1 : |
162 raiseException (' You must have at least one hidden layer ') | 183 raiseException (' You must have at least one hidden layer ') |