comparison denoising_aa.py @ 210:ffd50efefb70

work in progress denoising auto-encoder
author Yoshua Bengio <bengioy@iro.umontreal.ca>
date Sat, 17 May 2008 00:01:47 -0400
parents
children bd728c83faff
comparison
equal deleted inserted replaced
209:50a8302addaf 210:ffd50efefb70
1 """
2 A denoising auto-encoder
3 """
4
5 import theano
6 from theano.formula import *
7 from learner import *
8 from theano import tensor as t
9 from nnet_ops import *
10 import math
11 from misc import *
12 from theano.tensor_random import binomial
13
14 def hiding_corruption_formula(seed,average_fraction_hidden):
15 """
16 Return a formula for the corruption process, in which a random
17 subset of the input numbers are hidden (mapped to 0).
18
19 @param seed: seed of the random generator
20 @type seed: anything that numpy.random.RandomState accepts
21
22 @param average_fraction_hidden: the probability with which each
23 input number is hidden (set to 0).
24 @type average_fraction_hidden: 0 <= real number <= 1
25 """
26 class HidingCorruptionFormula(Formulas):
27 x = t.matrix()
28 corrupted_x = x * binomial(seed,x,1,fraction_sampled)
29
30 return HidingCorruptionFormula()
31
32 def squash_affine_formula(squash_function=sigmoid):
33 """
34 By convention prefix the parameters by _
35 """
36 class SquashAffineFormula(Formulas):
37 x = t.matrix() # of dimensions minibatch_size x n_inputs
38 _b = t.row() # of dimensions 1 x n_outputs
39 _W = t.matrix() # of dimensions n_inputs x n_outputs
40 a = _b + t.dot(x,_W) # of dimensions minibatch_size x n_outputs
41 y = squash_function(a)
42 return SquashAffineFormula()
43
44 def gradient_descent_update_formula():
45 class GradientDescentUpdateFormula(Formula):
46 param = t.matrix()
47 learning_rate = t.scalar()
48 cost = t.column() # cost of each example in a minibatch
49 param_update = t.add_inplace(param, -learning_rate*t.sgrad(cost))
50 return gradient_descent_update_formula()
51
52 def probabilistic_classifier_loss_formula():
53 class ProbabilisticClassifierLossFormula(Formulas):
54 a = t.matrix() # of dimensions minibatch_size x n_classes, pre-softmax output
55 target_class = t.ivector() # dimension (minibatch_size)
56 nll, probability_predictions = crossentropy_softmax_1hot(a, target_class)
57 return ProbabilisticClassifierLossFormula()
58
59 def binomial_cross_entropy_formula():
60 class BinomialCrossEntropyFormula(Formulas):
61 a = t.matrix() # pre-sigmoid activations, minibatch_size x dim
62 p = sigmoid(a) # model prediction
63 q = t.matrix() # target binomial probabilities, minibatch_size x dim
64 # using the identity softplus(a) - softplus(-a) = a,
65 # we obtain that q log(p) + (1-q) log(1-p) = q a - softplus(a)
66 nll = -t.sum(q*a - softplus(-a))
67
68 def squash_affine_autoencoder_formula(hidden_squash=t.tanh,
69 reconstruction_squash=sigmoid,
70 share_weights=True,
71 reconstruction_nll_formula=binomial_cross_entropy_formula(),
72 update_formula=gradient_descent_update_formula):
73 if share_weights:
74 autoencoder = squash_affine_formula(hidden_squash).rename(a='code_a') + \
75 squash_affine_formula(reconstruction_squash).rename(x='hidden',y='reconstruction',_b='_c') + \
76 reconstruction_nll_formula
77 else:
78 autoencoder = squash_affine_formula(hidden_squash).rename(a='code_a',_W='_W1') + \
79 squash_affine_formula(reconstruction_squash).rename(x='hidden',y='reconstruction',_b='_c',_W='_W2') + \
80 reconstruction_nll_formula
81 autoencoder = autoencoder + [update_formula().rename(cost = 'nll',
82 param = p)
83 for p in autoencoder.get_all('_.*')]
84 return autoencoder
85
86
87 # @todo: try other corruption formulae. The above is the default one.
88 # not quite used in the ICML paper... (had a fixed number of 0s).
89
90 class DenoisingAutoEncoder(LearningAlgorithm):
91
92 def __init__(self,n_inputs,n_hidden_per_layer,
93 learning_rate=0.1,
94 max_n_epochs=100,
95 L1_regularizer=0,
96 init_range=1.,
97 corruption_formula = hiding_corruption_formula(),
98 autoencoder = squash_affine_autoencoder_formula(),
99 minibatch_size=None,linker = "c|py"):
100 for name,val in locals().items():
101 if val is not self: self.__setattribute__(name,val)
102 self.denoising_autoencoder_formula = corruption_formula + autoencoder.rename(x='corrupted_x')
103
104 def __call__(self, training_set=None):
105 model = DenoisingAutoEncoderModel(self)
106 if training_set:
107
108 def compile(self, inputs, outputs):
109 return theano.function(inputs,outputs,unpack_single=False,linker=self.linker)
110
111 class DenoisingAutoEncoderModel(LearnerModel):
112 def __init__(self,learning_algorithm,params):
113 self.learning_algorithm=learning_algorithm
114 self.params=params
115 v = learning_algorithm.v
116 self.update_fn = learning_algorithm.compile(learning_algorithm.denoising_autoencoder_formula.inputs,
117 learning_algorithm.denoising_autoencoder_formula.outputs)
118
119 def update(self, training_set, train_stats_collector=None):
120
121
122 # old stuff
123
124 # self._learning_rate = t.scalar('learning_rate') # this is the symbol
125 # self.L1_regularizer = L1_regularizer
126 # self._L1_regularizer = t.scalar('L1_regularizer')
127 # self._input = t.matrix('input') # n_examples x n_inputs
128 # self._W = t.matrix('W')
129 # self._b = t.row('b')
130 # self._c = t.row('b')
131 # self._regularization_term = self._L1_regularizer * t.sum(t.abs(self._W))
132 # self._corrupted_input = corruption_process(self._input)
133 # self._hidden = t.tanh(self._b + t.dot(self._input, self._W.T))
134 # self._reconstruction_activations =self._c+t.dot(self._hidden,self._W)
135 # self._nll,self._output = crossentropy_softmax_1hot(Print("output_activations")(self._output_activations),self._target_vector)
136 # self._output_class = t.argmax(self._output,1)
137 # self._class_error = t.neq(self._output_class,self._target_vector)
138 # self._minibatch_criterion = self._nll + self._regularization_term / t.shape(self._input)[0]
139 # OnlineGradientTLearner.__init__(self)
140
141 # def attributeNames(self):
142 # return ["parameters","b1","W2","b2","W2", "L2_regularizer","regularization_term"]
143
144 # def parameterAttributes(self):
145 # return ["b1","W1", "b2", "W2"]
146
147 # def updateMinibatchInputFields(self):
148 # return ["input","target"]
149
150 # def updateEndOutputAttributes(self):
151 # return ["regularization_term"]
152
153 # def lossAttribute(self):
154 # return "minibatch_criterion"
155
156 # def defaultOutputFields(self, input_fields):
157 # output_fields = ["output", "output_class",]
158 # if "target" in input_fields:
159 # output_fields += ["class_error", "nll"]
160 # return output_fields
161
162 # def allocate(self,minibatch):
163 # minibatch_n_inputs = minibatch["input"].shape[1]
164 # if not self._n_inputs:
165 # self._n_inputs = minibatch_n_inputs
166 # self.b1 = numpy.zeros((1,self._n_hidden))
167 # self.b2 = numpy.zeros((1,self._n_outputs))
168 # self.forget()
169 # elif self._n_inputs!=minibatch_n_inputs:
170 # # if the input changes dimension on the fly, we resize and forget everything
171 # self.forget()
172
173 # def forget(self):
174 # if self._n_inputs:
175 # r = self._init_range/math.sqrt(self._n_inputs)
176 # self.W1 = numpy.random.uniform(low=-r,high=r,
177 # size=(self._n_hidden,self._n_inputs))
178 # r = self._init_range/math.sqrt(self._n_hidden)
179 # self.W2 = numpy.random.uniform(low=-r,high=r,
180 # size=(self._n_outputs,self._n_hidden))
181 # self.b1[:]=0
182 # self.b2[:]=0
183 # self._n_epochs=0
184
185 # def isLastEpoch(self):
186 # self._n_epochs +=1
187 # return self._n_epochs>=self._max_n_epochs