Mercurial > ift6266
comparison deep/stacked_dae/stacked_dae.py @ 207:43af74a348ac
Merge branches from main repo.
author | Arnaud Bergeron <abergeron@gmail.com> |
---|---|
date | Thu, 04 Mar 2010 20:43:21 -0500 |
parents | e1f5f66dd7dd |
children | acb942530923 |
comparison
equal
deleted
inserted
replaced
206:e12702b88a2d | 207:43af74a348ac |
---|---|
7 import theano.tensor as T | 7 import theano.tensor as T |
8 from theano.tensor.shared_randomstreams import RandomStreams | 8 from theano.tensor.shared_randomstreams import RandomStreams |
9 import copy | 9 import copy |
10 | 10 |
11 from utils import update_locals | 11 from utils import update_locals |
12 | |
13 # taken from LeDeepNet/daa.py | |
14 # has a special case when taking log(0) (defined =0) | |
15 # modified to not take the mean anymore | |
16 from theano.tensor.xlogx import xlogx, xlogy0 | |
17 # it's target*log(output) | |
18 def binary_cross_entropy(target, output, sum_axis=1): | |
19 XE = xlogy0(target, output) + xlogy0((1 - target), (1 - output)) | |
20 return -T.sum(XE, axis=sum_axis) | |
12 | 21 |
13 class LogisticRegression(object): | 22 class LogisticRegression(object): |
14 def __init__(self, input, n_in, n_out): | 23 def __init__(self, input, n_in, n_out): |
15 # initialize with 0 the weights W as a matrix of shape (n_in, n_out) | 24 # initialize with 0 the weights W as a matrix of shape (n_in, n_out) |
16 self.W = theano.shared( value=numpy.zeros((n_in,n_out), | 25 self.W = theano.shared( value=numpy.zeros((n_in,n_out), |
126 # Equation (3) | 135 # Equation (3) |
127 self.z = T.nnet.sigmoid(T.dot(self.y, self.W_prime) + self.b_prime) | 136 self.z = T.nnet.sigmoid(T.dot(self.y, self.W_prime) + self.b_prime) |
128 # Equation (4) | 137 # Equation (4) |
129 # note : we sum over the size of a datapoint; if we are using minibatches, | 138 # note : we sum over the size of a datapoint; if we are using minibatches, |
130 # L will be a vector, with one entry per example in minibatch | 139 # L will be a vector, with one entry per example in minibatch |
131 self.L = - T.sum( self.x*T.log(self.z) + (1-self.x)*T.log(1-self.z), axis=1 ) | 140 #self.L = - T.sum( self.x*T.log(self.z) + (1-self.x)*T.log(1-self.z), axis=1 ) |
141 #self.L = binary_cross_entropy(target=self.x, output=self.z, sum_axis=1) | |
142 | |
143 # I added this epsilon to avoid getting log(0) and 1/0 in grad | |
144 # This means conceptually that there'd be no probability of 0, but that | |
145 # doesn't seem to me as important (maybe I'm wrong?). | |
146 eps = 0.00000001 | |
147 eps_1 = 1-eps | |
148 self.L = - T.sum( self.x * T.log(eps + eps_1*self.z) \ | |
149 + (1-self.x)*T.log(eps + eps_1*(1-self.z)), axis=1 ) | |
132 # note : L is now a vector, where each element is the cross-entropy cost | 150 # note : L is now a vector, where each element is the cross-entropy cost |
133 # of the reconstruction of the corresponding example of the | 151 # of the reconstruction of the corresponding example of the |
134 # minibatch. We need to compute the average of all these to get | 152 # minibatch. We need to compute the average of all these to get |
135 # the cost of the minibatch | 153 # the cost of the minibatch |
136 self.cost = T.mean(self.L) | 154 self.cost = T.mean(self.L) |
137 | 155 |
138 self.params = [ self.W, self.b, self.b_prime ] | 156 self.params = [ self.W, self.b, self.b_prime ] |
139 | |
140 | |
141 | 157 |
142 | 158 |
143 class SdA(object): | 159 class SdA(object): |
144 def __init__(self, train_set_x, train_set_y, batch_size, n_ins, | 160 def __init__(self, train_set_x, train_set_y, batch_size, n_ins, |
145 hidden_layers_sizes, n_outs, | 161 hidden_layers_sizes, n_outs, |
146 corruption_levels, rng, pretrain_lr, finetune_lr, input_divider=1.0): | 162 corruption_levels, rng, pretrain_lr, finetune_lr, input_divider=1.0): |
147 # Just to make sure those are not modified somewhere else afterwards | 163 # Just to make sure those are not modified somewhere else afterwards |
148 hidden_layers_sizes = copy.deepcopy(hidden_layers_sizes) | 164 hidden_layers_sizes = copy.deepcopy(hidden_layers_sizes) |
149 corruption_levels = copy.deepcopy(corruption_levels) | 165 corruption_levels = copy.deepcopy(corruption_levels) |
166 | |
150 update_locals(self, locals()) | 167 update_locals(self, locals()) |
151 | 168 |
152 self.layers = [] | 169 self.layers = [] |
153 self.pretrain_functions = [] | 170 self.pretrain_functions = [] |
154 self.params = [] | 171 self.params = [] |
155 # MODIF: added this so we also get the b_primes | 172 # MODIF: added this so we also get the b_primes |
156 # (not used for finetuning... still using ".params") | 173 # (not used for finetuning... still using ".params") |
157 self.all_params = [] | 174 self.all_params = [] |
158 self.n_layers = len(hidden_layers_sizes) | 175 self.n_layers = len(hidden_layers_sizes) |
176 | |
177 print "Creating SdA with params:" | |
178 print "batch_size", batch_size | |
179 print "hidden_layers_sizes", hidden_layers_sizes | |
180 print "corruption_levels", corruption_levels | |
181 print "n_ins", n_ins | |
182 print "n_outs", n_outs | |
183 print "pretrain_lr", pretrain_lr | |
184 print "finetune_lr", finetune_lr | |
185 print "input_divider", input_divider | |
186 print "----" | |
159 | 187 |
160 self.shared_divider = theano.shared(numpy.asarray(input_divider, dtype=theano.config.floatX)) | 188 self.shared_divider = theano.shared(numpy.asarray(input_divider, dtype=theano.config.floatX)) |
161 | 189 |
162 if len(hidden_layers_sizes) < 1 : | 190 if len(hidden_layers_sizes) < 1 : |
163 raiseException (' You must have at least one hidden layer ') | 191 raiseException (' You must have at least one hidden layer ') |