comparison deep/stacked_dae/stacked_dae.py @ 207:43af74a348ac

Merge branches from main repo.
author Arnaud Bergeron <abergeron@gmail.com>
date Thu, 04 Mar 2010 20:43:21 -0500
parents e1f5f66dd7dd
children acb942530923
comparison
equal deleted inserted replaced
206:e12702b88a2d 207:43af74a348ac
7 import theano.tensor as T 7 import theano.tensor as T
8 from theano.tensor.shared_randomstreams import RandomStreams 8 from theano.tensor.shared_randomstreams import RandomStreams
9 import copy 9 import copy
10 10
11 from utils import update_locals 11 from utils import update_locals
12
13 # taken from LeDeepNet/daa.py
14 # has a special case when taking log(0) (defined =0)
15 # modified to not take the mean anymore
16 from theano.tensor.xlogx import xlogx, xlogy0
17 # it's target*log(output)
18 def binary_cross_entropy(target, output, sum_axis=1):
19 XE = xlogy0(target, output) + xlogy0((1 - target), (1 - output))
20 return -T.sum(XE, axis=sum_axis)
12 21
13 class LogisticRegression(object): 22 class LogisticRegression(object):
14 def __init__(self, input, n_in, n_out): 23 def __init__(self, input, n_in, n_out):
15 # initialize with 0 the weights W as a matrix of shape (n_in, n_out) 24 # initialize with 0 the weights W as a matrix of shape (n_in, n_out)
16 self.W = theano.shared( value=numpy.zeros((n_in,n_out), 25 self.W = theano.shared( value=numpy.zeros((n_in,n_out),
126 # Equation (3) 135 # Equation (3)
127 self.z = T.nnet.sigmoid(T.dot(self.y, self.W_prime) + self.b_prime) 136 self.z = T.nnet.sigmoid(T.dot(self.y, self.W_prime) + self.b_prime)
128 # Equation (4) 137 # Equation (4)
129 # note : we sum over the size of a datapoint; if we are using minibatches, 138 # note : we sum over the size of a datapoint; if we are using minibatches,
130 # L will be a vector, with one entry per example in minibatch 139 # L will be a vector, with one entry per example in minibatch
131 self.L = - T.sum( self.x*T.log(self.z) + (1-self.x)*T.log(1-self.z), axis=1 ) 140 #self.L = - T.sum( self.x*T.log(self.z) + (1-self.x)*T.log(1-self.z), axis=1 )
141 #self.L = binary_cross_entropy(target=self.x, output=self.z, sum_axis=1)
142
143 # I added this epsilon to avoid getting log(0) and 1/0 in grad
144 # This means conceptually that there'd be no probability of 0, but that
145 # doesn't seem to me as important (maybe I'm wrong?).
146 eps = 0.00000001
147 eps_1 = 1-eps
148 self.L = - T.sum( self.x * T.log(eps + eps_1*self.z) \
149 + (1-self.x)*T.log(eps + eps_1*(1-self.z)), axis=1 )
132 # note : L is now a vector, where each element is the cross-entropy cost 150 # note : L is now a vector, where each element is the cross-entropy cost
133 # of the reconstruction of the corresponding example of the 151 # of the reconstruction of the corresponding example of the
134 # minibatch. We need to compute the average of all these to get 152 # minibatch. We need to compute the average of all these to get
135 # the cost of the minibatch 153 # the cost of the minibatch
136 self.cost = T.mean(self.L) 154 self.cost = T.mean(self.L)
137 155
138 self.params = [ self.W, self.b, self.b_prime ] 156 self.params = [ self.W, self.b, self.b_prime ]
139
140
141 157
142 158
143 class SdA(object): 159 class SdA(object):
144 def __init__(self, train_set_x, train_set_y, batch_size, n_ins, 160 def __init__(self, train_set_x, train_set_y, batch_size, n_ins,
145 hidden_layers_sizes, n_outs, 161 hidden_layers_sizes, n_outs,
146 corruption_levels, rng, pretrain_lr, finetune_lr, input_divider=1.0): 162 corruption_levels, rng, pretrain_lr, finetune_lr, input_divider=1.0):
147 # Just to make sure those are not modified somewhere else afterwards 163 # Just to make sure those are not modified somewhere else afterwards
148 hidden_layers_sizes = copy.deepcopy(hidden_layers_sizes) 164 hidden_layers_sizes = copy.deepcopy(hidden_layers_sizes)
149 corruption_levels = copy.deepcopy(corruption_levels) 165 corruption_levels = copy.deepcopy(corruption_levels)
166
150 update_locals(self, locals()) 167 update_locals(self, locals())
151 168
152 self.layers = [] 169 self.layers = []
153 self.pretrain_functions = [] 170 self.pretrain_functions = []
154 self.params = [] 171 self.params = []
155 # MODIF: added this so we also get the b_primes 172 # MODIF: added this so we also get the b_primes
156 # (not used for finetuning... still using ".params") 173 # (not used for finetuning... still using ".params")
157 self.all_params = [] 174 self.all_params = []
158 self.n_layers = len(hidden_layers_sizes) 175 self.n_layers = len(hidden_layers_sizes)
176
177 print "Creating SdA with params:"
178 print "batch_size", batch_size
179 print "hidden_layers_sizes", hidden_layers_sizes
180 print "corruption_levels", corruption_levels
181 print "n_ins", n_ins
182 print "n_outs", n_outs
183 print "pretrain_lr", pretrain_lr
184 print "finetune_lr", finetune_lr
185 print "input_divider", input_divider
186 print "----"
159 187
160 self.shared_divider = theano.shared(numpy.asarray(input_divider, dtype=theano.config.floatX)) 188 self.shared_divider = theano.shared(numpy.asarray(input_divider, dtype=theano.config.floatX))
161 189
162 if len(hidden_layers_sizes) < 1 : 190 if len(hidden_layers_sizes) < 1 :
163 raiseException (' You must have at least one hidden layer ') 191 raiseException (' You must have at least one hidden layer ')