Mercurial > pylearn
comparison denoising_aa.py @ 210:ffd50efefb70
work in progress denoising auto-encoder
author | Yoshua Bengio <bengioy@iro.umontreal.ca> |
---|---|
date | Sat, 17 May 2008 00:01:47 -0400 |
parents | |
children | bd728c83faff |
comparison
equal
deleted
inserted
replaced
209:50a8302addaf | 210:ffd50efefb70 |
---|---|
1 """ | |
2 A denoising auto-encoder | |
3 """ | |
4 | |
5 import theano | |
6 from theano.formula import * | |
7 from learner import * | |
8 from theano import tensor as t | |
9 from nnet_ops import * | |
10 import math | |
11 from misc import * | |
12 from theano.tensor_random import binomial | |
13 | |
14 def hiding_corruption_formula(seed,average_fraction_hidden): | |
15 """ | |
16 Return a formula for the corruption process, in which a random | |
17 subset of the input numbers are hidden (mapped to 0). | |
18 | |
19 @param seed: seed of the random generator | |
20 @type seed: anything that numpy.random.RandomState accepts | |
21 | |
22 @param average_fraction_hidden: the probability with which each | |
23 input number is hidden (set to 0). | |
24 @type average_fraction_hidden: 0 <= real number <= 1 | |
25 """ | |
26 class HidingCorruptionFormula(Formulas): | |
27 x = t.matrix() | |
28 corrupted_x = x * binomial(seed,x,1,fraction_sampled) | |
29 | |
30 return HidingCorruptionFormula() | |
31 | |
32 def squash_affine_formula(squash_function=sigmoid): | |
33 """ | |
34 By convention prefix the parameters by _ | |
35 """ | |
36 class SquashAffineFormula(Formulas): | |
37 x = t.matrix() # of dimensions minibatch_size x n_inputs | |
38 _b = t.row() # of dimensions 1 x n_outputs | |
39 _W = t.matrix() # of dimensions n_inputs x n_outputs | |
40 a = _b + t.dot(x,_W) # of dimensions minibatch_size x n_outputs | |
41 y = squash_function(a) | |
42 return SquashAffineFormula() | |
43 | |
44 def gradient_descent_update_formula(): | |
45 class GradientDescentUpdateFormula(Formula): | |
46 param = t.matrix() | |
47 learning_rate = t.scalar() | |
48 cost = t.column() # cost of each example in a minibatch | |
49 param_update = t.add_inplace(param, -learning_rate*t.sgrad(cost)) | |
50 return gradient_descent_update_formula() | |
51 | |
52 def probabilistic_classifier_loss_formula(): | |
53 class ProbabilisticClassifierLossFormula(Formulas): | |
54 a = t.matrix() # of dimensions minibatch_size x n_classes, pre-softmax output | |
55 target_class = t.ivector() # dimension (minibatch_size) | |
56 nll, probability_predictions = crossentropy_softmax_1hot(a, target_class) | |
57 return ProbabilisticClassifierLossFormula() | |
58 | |
59 def binomial_cross_entropy_formula(): | |
60 class BinomialCrossEntropyFormula(Formulas): | |
61 a = t.matrix() # pre-sigmoid activations, minibatch_size x dim | |
62 p = sigmoid(a) # model prediction | |
63 q = t.matrix() # target binomial probabilities, minibatch_size x dim | |
64 # using the identity softplus(a) - softplus(-a) = a, | |
65 # we obtain that q log(p) + (1-q) log(1-p) = q a - softplus(a) | |
66 nll = -t.sum(q*a - softplus(-a)) | |
67 | |
68 def squash_affine_autoencoder_formula(hidden_squash=t.tanh, | |
69 reconstruction_squash=sigmoid, | |
70 share_weights=True, | |
71 reconstruction_nll_formula=binomial_cross_entropy_formula(), | |
72 update_formula=gradient_descent_update_formula): | |
73 if share_weights: | |
74 autoencoder = squash_affine_formula(hidden_squash).rename(a='code_a') + \ | |
75 squash_affine_formula(reconstruction_squash).rename(x='hidden',y='reconstruction',_b='_c') + \ | |
76 reconstruction_nll_formula | |
77 else: | |
78 autoencoder = squash_affine_formula(hidden_squash).rename(a='code_a',_W='_W1') + \ | |
79 squash_affine_formula(reconstruction_squash).rename(x='hidden',y='reconstruction',_b='_c',_W='_W2') + \ | |
80 reconstruction_nll_formula | |
81 autoencoder = autoencoder + [update_formula().rename(cost = 'nll', | |
82 param = p) | |
83 for p in autoencoder.get_all('_.*')] | |
84 return autoencoder | |
85 | |
86 | |
87 # @todo: try other corruption formulae. The above is the default one. | |
88 # not quite used in the ICML paper... (had a fixed number of 0s). | |
89 | |
90 class DenoisingAutoEncoder(LearningAlgorithm): | |
91 | |
92 def __init__(self,n_inputs,n_hidden_per_layer, | |
93 learning_rate=0.1, | |
94 max_n_epochs=100, | |
95 L1_regularizer=0, | |
96 init_range=1., | |
97 corruption_formula = hiding_corruption_formula(), | |
98 autoencoder = squash_affine_autoencoder_formula(), | |
99 minibatch_size=None,linker = "c|py"): | |
100 for name,val in locals().items(): | |
101 if val is not self: self.__setattribute__(name,val) | |
102 self.denoising_autoencoder_formula = corruption_formula + autoencoder.rename(x='corrupted_x') | |
103 | |
104 def __call__(self, training_set=None): | |
105 model = DenoisingAutoEncoderModel(self) | |
106 if training_set: | |
107 | |
108 def compile(self, inputs, outputs): | |
109 return theano.function(inputs,outputs,unpack_single=False,linker=self.linker) | |
110 | |
111 class DenoisingAutoEncoderModel(LearnerModel): | |
112 def __init__(self,learning_algorithm,params): | |
113 self.learning_algorithm=learning_algorithm | |
114 self.params=params | |
115 v = learning_algorithm.v | |
116 self.update_fn = learning_algorithm.compile(learning_algorithm.denoising_autoencoder_formula.inputs, | |
117 learning_algorithm.denoising_autoencoder_formula.outputs) | |
118 | |
119 def update(self, training_set, train_stats_collector=None): | |
120 | |
121 | |
122 # old stuff | |
123 | |
124 # self._learning_rate = t.scalar('learning_rate') # this is the symbol | |
125 # self.L1_regularizer = L1_regularizer | |
126 # self._L1_regularizer = t.scalar('L1_regularizer') | |
127 # self._input = t.matrix('input') # n_examples x n_inputs | |
128 # self._W = t.matrix('W') | |
129 # self._b = t.row('b') | |
130 # self._c = t.row('b') | |
131 # self._regularization_term = self._L1_regularizer * t.sum(t.abs(self._W)) | |
132 # self._corrupted_input = corruption_process(self._input) | |
133 # self._hidden = t.tanh(self._b + t.dot(self._input, self._W.T)) | |
134 # self._reconstruction_activations =self._c+t.dot(self._hidden,self._W) | |
135 # self._nll,self._output = crossentropy_softmax_1hot(Print("output_activations")(self._output_activations),self._target_vector) | |
136 # self._output_class = t.argmax(self._output,1) | |
137 # self._class_error = t.neq(self._output_class,self._target_vector) | |
138 # self._minibatch_criterion = self._nll + self._regularization_term / t.shape(self._input)[0] | |
139 # OnlineGradientTLearner.__init__(self) | |
140 | |
141 # def attributeNames(self): | |
142 # return ["parameters","b1","W2","b2","W2", "L2_regularizer","regularization_term"] | |
143 | |
144 # def parameterAttributes(self): | |
145 # return ["b1","W1", "b2", "W2"] | |
146 | |
147 # def updateMinibatchInputFields(self): | |
148 # return ["input","target"] | |
149 | |
150 # def updateEndOutputAttributes(self): | |
151 # return ["regularization_term"] | |
152 | |
153 # def lossAttribute(self): | |
154 # return "minibatch_criterion" | |
155 | |
156 # def defaultOutputFields(self, input_fields): | |
157 # output_fields = ["output", "output_class",] | |
158 # if "target" in input_fields: | |
159 # output_fields += ["class_error", "nll"] | |
160 # return output_fields | |
161 | |
162 # def allocate(self,minibatch): | |
163 # minibatch_n_inputs = minibatch["input"].shape[1] | |
164 # if not self._n_inputs: | |
165 # self._n_inputs = minibatch_n_inputs | |
166 # self.b1 = numpy.zeros((1,self._n_hidden)) | |
167 # self.b2 = numpy.zeros((1,self._n_outputs)) | |
168 # self.forget() | |
169 # elif self._n_inputs!=minibatch_n_inputs: | |
170 # # if the input changes dimension on the fly, we resize and forget everything | |
171 # self.forget() | |
172 | |
173 # def forget(self): | |
174 # if self._n_inputs: | |
175 # r = self._init_range/math.sqrt(self._n_inputs) | |
176 # self.W1 = numpy.random.uniform(low=-r,high=r, | |
177 # size=(self._n_hidden,self._n_inputs)) | |
178 # r = self._init_range/math.sqrt(self._n_hidden) | |
179 # self.W2 = numpy.random.uniform(low=-r,high=r, | |
180 # size=(self._n_outputs,self._n_hidden)) | |
181 # self.b1[:]=0 | |
182 # self.b2[:]=0 | |
183 # self._n_epochs=0 | |
184 | |
185 # def isLastEpoch(self): | |
186 # self._n_epochs +=1 | |
187 # return self._n_epochs>=self._max_n_epochs |