Mercurial > pylearn
changeset 418:2ea14774eb07
Automated merge with ssh://projects@lgcm.iro.umontreal.ca/hg/pylearn
author | Frederic Bastien <bastienf@iro.umontreal.ca> |
---|---|
date | Mon, 14 Jul 2008 13:48:41 -0400 |
parents | 4f61201fa9a9 (diff) 5175c564e37a (current diff) |
children | 43d9aa93934e |
files | |
diffstat | 11 files changed, 316 insertions(+), 288 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cost.py Mon Jul 14 13:48:41 2008 -0400 @@ -0,0 +1,11 @@ +""" +Cost functions. +""" + +import theano.tensor as T + +def quadratic(target, output, axis=1): + return T.mean(T.sqr(target - output), axis) + +def cross_entropy(target, output, axis=1): + return -T.mean(target * T.log2(output) + (1 - target) * T.log2(1 - output), axis=axis)
--- a/denoising_aa.py Mon Jul 14 13:48:36 2008 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,220 +0,0 @@ -""" -A denoising auto-encoder -""" - -import theano -from theano.formula import * -from learner import * -from theano import tensor as t -from nnet_ops import * -import math -from misc import * -from misc_theano import * -from theano.tensor_random import binomial - -def hiding_corruption_formula(seed,average_fraction_hidden): - """ - Return a formula for the corruption process, in which a random - subset of the input numbers are hidden (mapped to 0). - - @param seed: seed of the random generator - @type seed: anything that numpy.random.RandomState accepts - - @param average_fraction_hidden: the probability with which each - input number is hidden (set to 0). - @type average_fraction_hidden: 0 <= real number <= 1 - """ - class HidingCorruptionFormula(Formulas): - x = t.matrix() - corrupted_x = x * binomial(seed,x,1,fraction_sampled) - - return HidingCorruptionFormula() - -def squash_affine_formula(squash_function=sigmoid): - """ - Simply does: squash_function(b + xW) - By convention prefix the parameters by _ - """ - class SquashAffineFormula(Formulas): - x = t.matrix() # of dimensions minibatch_size x n_inputs - _b = t.row() # of dimensions 1 x n_outputs - _W = t.matrix() # of dimensions n_inputs x n_outputs - a = _b + t.dot(x,_W) # of dimensions minibatch_size x n_outputs - y = squash_function(a) - return SquashAffineFormula() - -def gradient_descent_update_formula(): - class GradientDescentUpdateFormula(Formula): - param = t.matrix() - learning_rate = t.scalar() - cost = t.column() # cost of each example in a minibatch - param_update = t.add_inplace(param, -learning_rate*t.sgrad(cost)) - return gradient_descent_update_formula() - -def probabilistic_classifier_loss_formula(): - class ProbabilisticClassifierLossFormula(Formulas): - a = t.matrix() # of dimensions minibatch_size x n_classes, pre-softmax output - target_class = t.ivector() # dimension (minibatch_size) - nll, probability_predictions = crossentropy_softmax_1hot(a, target_class) # defined in nnet_ops.py - return ProbabilisticClassifierLossFormula() - -def binomial_cross_entropy_formula(): - class BinomialCrossEntropyFormula(Formulas): - a = t.matrix() # pre-sigmoid activations, minibatch_size x dim - p = sigmoid(a) # model prediction - q = t.matrix() # target binomial probabilities, minibatch_size x dim - # using the identity softplus(a) - softplus(-a) = a, - # we obtain that q log(p) + (1-q) log(1-p) = q a - softplus(a) - nll = -t.sum(q*a - softplus(-a)) - # next line was missing... hope it's all correct above - return BinomialCrossEntropyFormula() - -def squash_affine_autoencoder_formula(hidden_squash=t.tanh, - reconstruction_squash=sigmoid, - share_weights=True, - reconstruction_nll_formula=binomial_cross_entropy_formula(), - update_formula=gradient_descent_update_formula): - if share_weights: - autoencoder = squash_affine_formula(hidden_squash).rename(a='code_a') + \ - squash_affine_formula(reconstruction_squash).rename(x='hidden',y='reconstruction',_b='_c') + \ - reconstruction_nll_formula - else: - autoencoder = squash_affine_formula(hidden_squash).rename(a='code_a',_W='_W1') + \ - squash_affine_formula(reconstruction_squash).rename(x='hidden',y='reconstruction',_b='_c',_W='_W2') + \ - reconstruction_nll_formula - autoencoder = autoencoder + [update_formula().rename(cost = 'nll', - param = p) - for p in autoencoder.get_all('_.*')] - return autoencoder - - -# @todo: try other corruption formulae. The above is the default one. -# not quite used in the ICML paper... (had a fixed number of 0s). - -class DenoisingAutoEncoder(LearningAlgorithm): - - def __init__(self,n_inputs,n_hidden_per_layer, - learning_rate=0.1, - max_n_epochs=100, - L1_regularizer=0, - init_range=1., - corruption_formula = hiding_corruption_formula(), - autoencoder = squash_affine_autoencoder_formula(), - minibatch_size=None,linker = "c|py"): - for name,val in locals().items(): - if val is not self: self.__setattribute__(name,val) - self.denoising_autoencoder_formula = corruption_formula + autoencoder.rename(x='corrupted_x') - - def __call__(self, training_set=None): - """ Allocate and optionnaly train a model - - @TODO enables passing in training and valid sets, instead of cutting one set in 80/20 - """ - model = DenoisingAutoEncoderModel(self) - if training_set: - print 'DenoisingAutoEncoder(): what do I do if training_set????' - # copied from old mlp_factory_approach: - if len(trainset) == sys.maxint: - raise NotImplementedError('Learning from infinite streams is not supported') - nval = int(self.validation_portion * len(trainset)) - nmin = len(trainset) - nval - assert nmin >= 0 - minset = trainset[:nmin] #real training set for minimizing loss - valset = trainset[nmin:] #validation set for early stopping - best = model - for stp in self.early_stopper(): - model.update( - minset.minibatches([input, target], minibatch_size=min(32, - len(trainset)))) - #print 'mlp.__call__(), we did an update' - if stp.set_score: - stp.score = model(valset, ['loss_01']) - if (stp.score < stp.best_score): - best = copy.copy(model) - model = best - # end of the copy from mlp_factory_approach - - return model - - - def compile(self, inputs, outputs): - return theano.function(inputs,outputs,unpack_single=False,linker=self.linker) - -class DenoisingAutoEncoderModel(LearnerModel): - def __init__(self,learning_algorithm,params): - self.learning_algorithm=learning_algorithm - self.params=params - v = learning_algorithm.v - self.update_fn = learning_algorithm.compile(learning_algorithm.denoising_autoencoder_formula.inputs, - learning_algorithm.denoising_autoencoder_formula.outputs) - - def update(self, training_set, train_stats_collector=None): - - print 'dont update you crazy frog!' - -# old stuff - -# self._learning_rate = t.scalar('learning_rate') # this is the symbol -# self.L1_regularizer = L1_regularizer -# self._L1_regularizer = t.scalar('L1_regularizer') -# self._input = t.matrix('input') # n_examples x n_inputs -# self._W = t.matrix('W') -# self._b = t.row('b') -# self._c = t.row('b') -# self._regularization_term = self._L1_regularizer * t.sum(t.abs(self._W)) -# self._corrupted_input = corruption_process(self._input) -# self._hidden = t.tanh(self._b + t.dot(self._input, self._W.T)) -# self._reconstruction_activations =self._c+t.dot(self._hidden,self._W) -# self._nll,self._output = crossentropy_softmax_1hot(Print("output_activations")(self._output_activations),self._target_vector) -# self._output_class = t.argmax(self._output,1) -# self._class_error = t.neq(self._output_class,self._target_vector) -# self._minibatch_criterion = self._nll + self._regularization_term / t.shape(self._input)[0] -# OnlineGradientTLearner.__init__(self) - -# def attributeNames(self): -# return ["parameters","b1","W2","b2","W2", "L2_regularizer","regularization_term"] - -# def parameterAttributes(self): -# return ["b1","W1", "b2", "W2"] - -# def updateMinibatchInputFields(self): -# return ["input","target"] - -# def updateEndOutputAttributes(self): -# return ["regularization_term"] - -# def lossAttribute(self): -# return "minibatch_criterion" - -# def defaultOutputFields(self, input_fields): -# output_fields = ["output", "output_class",] -# if "target" in input_fields: -# output_fields += ["class_error", "nll"] -# return output_fields - -# def allocate(self,minibatch): -# minibatch_n_inputs = minibatch["input"].shape[1] -# if not self._n_inputs: -# self._n_inputs = minibatch_n_inputs -# self.b1 = numpy.zeros((1,self._n_hidden)) -# self.b2 = numpy.zeros((1,self._n_outputs)) -# self.forget() -# elif self._n_inputs!=minibatch_n_inputs: -# # if the input changes dimension on the fly, we resize and forget everything -# self.forget() - -# def forget(self): -# if self._n_inputs: -# r = self._init_range/math.sqrt(self._n_inputs) -# self.W1 = numpy.random.uniform(low=-r,high=r, -# size=(self._n_hidden,self._n_inputs)) -# r = self._init_range/math.sqrt(self._n_hidden) -# self.W2 = numpy.random.uniform(low=-r,high=r, -# size=(self._n_outputs,self._n_hidden)) -# self.b1[:]=0 -# self.b2[:]=0 -# self._n_epochs=0 - -# def isLastEpoch(self): -# self._n_epochs +=1 -# return self._n_epochs>=self._max_n_epochs
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/noise.py Mon Jul 14 13:48:41 2008 -0400 @@ -0,0 +1,13 @@ +def binomial(input, rstate, p = 0.75): + """ + Op to corrupt an input with binomial noise. + Generate a noise vector of 1's and 0's (1 with probability p). + We multiply this by the input. + + @note: See U{ssh://projects@lgcm.iro.umontreal.ca/repos/denoising_aa} + to see how rstate is used. + """ + noise = rstate.gen_like(('binomial',{'p': p, 'n': 1}), input) + noise.name = 'noise' + return noise * input +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sandbox/denoising_aa.py Mon Jul 14 13:48:41 2008 -0400 @@ -0,0 +1,224 @@ +""" +A denoising auto-encoder + +@warning: You should use this interface. It is not complete and is not functional. +Instead, use:: + ssh://projects@lgcm.iro.umontreal.ca/repos/denoising_aa +""" + +import theano +from theano.formula import * +from learner import * +from theano import tensor as t +from nnet_ops import * +import math +from misc import * +from misc_theano import * +from theano.tensor_random import binomial + +def hiding_corruption_formula(seed,average_fraction_hidden): + """ + Return a formula for the corruption process, in which a random + subset of the input numbers are hidden (mapped to 0). + + @param seed: seed of the random generator + @type seed: anything that numpy.random.RandomState accepts + + @param average_fraction_hidden: the probability with which each + input number is hidden (set to 0). + @type average_fraction_hidden: 0 <= real number <= 1 + """ + class HidingCorruptionFormula(Formulas): + x = t.matrix() + corrupted_x = x * binomial(seed,x,1,fraction_sampled) + + return HidingCorruptionFormula() + +def squash_affine_formula(squash_function=sigmoid): + """ + Simply does: squash_function(b + xW) + By convention prefix the parameters by _ + """ + class SquashAffineFormula(Formulas): + x = t.matrix() # of dimensions minibatch_size x n_inputs + _b = t.row() # of dimensions 1 x n_outputs + _W = t.matrix() # of dimensions n_inputs x n_outputs + a = _b + t.dot(x,_W) # of dimensions minibatch_size x n_outputs + y = squash_function(a) + return SquashAffineFormula() + +def gradient_descent_update_formula(): + class GradientDescentUpdateFormula(Formula): + param = t.matrix() + learning_rate = t.scalar() + cost = t.column() # cost of each example in a minibatch + param_update = t.add_inplace(param, -learning_rate*t.sgrad(cost)) + return gradient_descent_update_formula() + +def probabilistic_classifier_loss_formula(): + class ProbabilisticClassifierLossFormula(Formulas): + a = t.matrix() # of dimensions minibatch_size x n_classes, pre-softmax output + target_class = t.ivector() # dimension (minibatch_size) + nll, probability_predictions = crossentropy_softmax_1hot(a, target_class) # defined in nnet_ops.py + return ProbabilisticClassifierLossFormula() + +def binomial_cross_entropy_formula(): + class BinomialCrossEntropyFormula(Formulas): + a = t.matrix() # pre-sigmoid activations, minibatch_size x dim + p = sigmoid(a) # model prediction + q = t.matrix() # target binomial probabilities, minibatch_size x dim + # using the identity softplus(a) - softplus(-a) = a, + # we obtain that q log(p) + (1-q) log(1-p) = q a - softplus(a) + nll = -t.sum(q*a - softplus(-a)) + # next line was missing... hope it's all correct above + return BinomialCrossEntropyFormula() + +def squash_affine_autoencoder_formula(hidden_squash=t.tanh, + reconstruction_squash=sigmoid, + share_weights=True, + reconstruction_nll_formula=binomial_cross_entropy_formula(), + update_formula=gradient_descent_update_formula): + if share_weights: + autoencoder = squash_affine_formula(hidden_squash).rename(a='code_a') + \ + squash_affine_formula(reconstruction_squash).rename(x='hidden',y='reconstruction',_b='_c') + \ + reconstruction_nll_formula + else: + autoencoder = squash_affine_formula(hidden_squash).rename(a='code_a',_W='_W1') + \ + squash_affine_formula(reconstruction_squash).rename(x='hidden',y='reconstruction',_b='_c',_W='_W2') + \ + reconstruction_nll_formula + autoencoder = autoencoder + [update_formula().rename(cost = 'nll', + param = p) + for p in autoencoder.get_all('_.*')] + return autoencoder + + +# @todo: try other corruption formulae. The above is the default one. +# not quite used in the ICML paper... (had a fixed number of 0s). + +class DenoisingAutoEncoder(LearningAlgorithm): + + def __init__(self,n_inputs,n_hidden_per_layer, + learning_rate=0.1, + max_n_epochs=100, + L1_regularizer=0, + init_range=1., + corruption_formula = hiding_corruption_formula(), + autoencoder = squash_affine_autoencoder_formula(), + minibatch_size=None,linker = "c|py"): + for name,val in locals().items(): + if val is not self: self.__setattribute__(name,val) + self.denoising_autoencoder_formula = corruption_formula + autoencoder.rename(x='corrupted_x') + + def __call__(self, training_set=None): + """ Allocate and optionnaly train a model + + @TODO enables passing in training and valid sets, instead of cutting one set in 80/20 + """ + model = DenoisingAutoEncoderModel(self) + if training_set: + print 'DenoisingAutoEncoder(): what do I do if training_set????' + # copied from old mlp_factory_approach: + if len(trainset) == sys.maxint: + raise NotImplementedError('Learning from infinite streams is not supported') + nval = int(self.validation_portion * len(trainset)) + nmin = len(trainset) - nval + assert nmin >= 0 + minset = trainset[:nmin] #real training set for minimizing loss + valset = trainset[nmin:] #validation set for early stopping + best = model + for stp in self.early_stopper(): + model.update( + minset.minibatches([input, target], minibatch_size=min(32, + len(trainset)))) + #print 'mlp.__call__(), we did an update' + if stp.set_score: + stp.score = model(valset, ['loss_01']) + if (stp.score < stp.best_score): + best = copy.copy(model) + model = best + # end of the copy from mlp_factory_approach + + return model + + + def compile(self, inputs, outputs): + return theano.function(inputs,outputs,unpack_single=False,linker=self.linker) + +class DenoisingAutoEncoderModel(LearnerModel): + def __init__(self,learning_algorithm,params): + self.learning_algorithm=learning_algorithm + self.params=params + v = learning_algorithm.v + self.update_fn = learning_algorithm.compile(learning_algorithm.denoising_autoencoder_formula.inputs, + learning_algorithm.denoising_autoencoder_formula.outputs) + + def update(self, training_set, train_stats_collector=None): + + print 'dont update you crazy frog!' + +# old stuff + +# self._learning_rate = t.scalar('learning_rate') # this is the symbol +# self.L1_regularizer = L1_regularizer +# self._L1_regularizer = t.scalar('L1_regularizer') +# self._input = t.matrix('input') # n_examples x n_inputs +# self._W = t.matrix('W') +# self._b = t.row('b') +# self._c = t.row('b') +# self._regularization_term = self._L1_regularizer * t.sum(t.abs(self._W)) +# self._corrupted_input = corruption_process(self._input) +# self._hidden = t.tanh(self._b + t.dot(self._input, self._W.T)) +# self._reconstruction_activations =self._c+t.dot(self._hidden,self._W) +# self._nll,self._output = crossentropy_softmax_1hot(Print("output_activations")(self._output_activations),self._target_vector) +# self._output_class = t.argmax(self._output,1) +# self._class_error = t.neq(self._output_class,self._target_vector) +# self._minibatch_criterion = self._nll + self._regularization_term / t.shape(self._input)[0] +# OnlineGradientTLearner.__init__(self) + +# def attributeNames(self): +# return ["parameters","b1","W2","b2","W2", "L2_regularizer","regularization_term"] + +# def parameterAttributes(self): +# return ["b1","W1", "b2", "W2"] + +# def updateMinibatchInputFields(self): +# return ["input","target"] + +# def updateEndOutputAttributes(self): +# return ["regularization_term"] + +# def lossAttribute(self): +# return "minibatch_criterion" + +# def defaultOutputFields(self, input_fields): +# output_fields = ["output", "output_class",] +# if "target" in input_fields: +# output_fields += ["class_error", "nll"] +# return output_fields + +# def allocate(self,minibatch): +# minibatch_n_inputs = minibatch["input"].shape[1] +# if not self._n_inputs: +# self._n_inputs = minibatch_n_inputs +# self.b1 = numpy.zeros((1,self._n_hidden)) +# self.b2 = numpy.zeros((1,self._n_outputs)) +# self.forget() +# elif self._n_inputs!=minibatch_n_inputs: +# # if the input changes dimension on the fly, we resize and forget everything +# self.forget() + +# def forget(self): +# if self._n_inputs: +# r = self._init_range/math.sqrt(self._n_inputs) +# self.W1 = numpy.random.uniform(low=-r,high=r, +# size=(self._n_hidden,self._n_inputs)) +# r = self._init_range/math.sqrt(self._n_hidden) +# self.W2 = numpy.random.uniform(low=-r,high=r, +# size=(self._n_outputs,self._n_hidden)) +# self.b1[:]=0 +# self.b2[:]=0 +# self._n_epochs=0 + +# def isLastEpoch(self): +# self._n_epochs +=1 +# return self._n_epochs>=self._max_n_epochs
--- a/sandbox/rbm/model.py Mon Jul 14 13:48:36 2008 -0400 +++ b/sandbox/rbm/model.py Mon Jul 14 13:48:41 2008 -0400 @@ -59,7 +59,7 @@ random.seed(random_seed) - self.parameters = parameters.Parameters(input_dimension=self.input_dimension, hidden_dimension=self.hidden_dimension, randomly_initialize=False, random_seed=self.random_seed) + self.parameters = parameters.Parameters(input_dimension=self.input_dimension, hidden_dimension=self.hidden_dimension, randomly_initialize=True, random_seed=self.random_seed) self.prev_dw = 0 self.prev_db = 0 self.prev_dc = 0 @@ -89,7 +89,7 @@ """ minibatch = len(instances) v0 = pylearn.sparse_instance.to_vector(instances, self.input_dimension) - print "old XENT:", numpy.sum(self.deterministic_reconstruction_error(v0)) + print "old XENT per instance:", numpy.sum(self.deterministic_reconstruction_error(v0))/minibatch q0 = sigmoid(self.parameters.b + dot(v0, self.parameters.w)) h0 = sample(q0) p0 = sigmoid(self.parameters.c + dot(h0, self.parameters.w.T)) @@ -110,7 +110,7 @@ self.last_db = db self.last_dc = dc - print "new XENT:", numpy.sum(self.deterministic_reconstruction_error(v0)) + print "new XENT per instance:", numpy.sum(self.deterministic_reconstruction_error(v0))/minibatch # print # print "v[0]:", v0
--- a/sandbox/simple_autoassociator/README.txt Mon Jul 14 13:48:36 2008 -0400 +++ b/sandbox/simple_autoassociator/README.txt Mon Jul 14 13:48:41 2008 -0400 @@ -1,2 +1,5 @@ -This is broken. It can't even learn the simple two training instances in -main.py +This seems to work. + +@todo: + * Add momentum. + * Add learning rate decay schedule.
--- a/sandbox/simple_autoassociator/globals.py Mon Jul 14 13:48:36 2008 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,12 +0,0 @@ -""" -Global variables. -""" - -#INPUT_DIMENSION = 1000 -#INPUT_DIMENSION = 100 -INPUT_DIMENSION = 4 -HIDDEN_DIMENSION = 10 -#HIDDEN_DIMENSION = 4 -LEARNING_RATE = 0.1 -LR = LEARNING_RATE -SEED = 666
--- a/sandbox/simple_autoassociator/graph.py Mon Jul 14 13:48:36 2008 -0400 +++ b/sandbox/simple_autoassociator/graph.py Mon Jul 14 13:48:41 2008 -0400 @@ -6,7 +6,7 @@ from pylearn.nnet_ops import sigmoid, binary_crossentropy from theano import tensor as t from theano.tensor import dot -x = t.dvector() +x = t.dmatrix() w1 = t.dmatrix() b1 = t.dvector() w2 = t.dmatrix() @@ -17,10 +17,10 @@ loss_unsummed = binary_crossentropy(y, x) loss = t.sum(loss_unsummed) -(gw1, gb1, gw2, gb2, gy, gh) = t.grad(loss, [w1, b1, w2, b2, y, h]) +(gw1, gb1, gw2, gb2) = t.grad(loss, [w1, b1, w2, b2]) import theano.compile inputs = [x, w1, b1, w2, b2] -outputs = [y, h, loss, loss_unsummed, gw1, gb1, gw2, gb2, gy, gh] +outputs = [y, h, loss, gw1, gb1, gw2, gb2] trainfn = theano.compile.function(inputs, outputs)
--- a/sandbox/simple_autoassociator/main.py Mon Jul 14 13:48:36 2008 -0400 +++ b/sandbox/simple_autoassociator/main.py Mon Jul 14 13:48:41 2008 -0400 @@ -7,9 +7,6 @@ y = sigmoid(dot(h, w2) + b2) Binary xent loss. - - LIMITATIONS: - - Only does pure stochastic gradient (batchsize = 1). """ @@ -24,11 +21,11 @@ ##nonzero_instances.append({1: 0.2, 2: 0.3, 5: 0.5}) import model -model = model.Model() +model = model.Model(input_dimension=10, hidden_dimension=4) for i in xrange(100000): - # Select an instance - instance = nonzero_instances[i % len(nonzero_instances)] +# # Select an instance +# instance = nonzero_instances[i % len(nonzero_instances)] - # SGD update over instance - model.update(instance) + # Update over instance + model.update(nonzero_instances)
--- a/sandbox/simple_autoassociator/model.py Mon Jul 14 13:48:36 2008 -0400 +++ b/sandbox/simple_autoassociator/model.py Mon Jul 14 13:48:41 2008 -0400 @@ -6,53 +6,66 @@ from graph import trainfn import parameters -import globals -from globals import LR - import numpy import random -random.seed(globals.SEED) + +import pylearn.sparse_instance class Model: - def __init__(self): - self.parameters = parameters.Parameters(randomly_initialize=True) + """ + @todo: Add momentum. + @todo: Add learning rate decay schedule. + """ + def __init__(self, input_dimension, hidden_dimension, learning_rate = 0.1, weight_decay = 0.0002, random_seed = 666): + self.input_dimension = input_dimension + self.hidden_dimension = hidden_dimension + self.learning_rate = learning_rate + self.weight_decay = weight_decay + self.random_seed = random_seed - def update(self, instance): + random.seed(random_seed) + + self.parameters = parameters.Parameters(input_dimension=self.input_dimension, hidden_dimension=self.hidden_dimension, randomly_initialize=True, random_seed=self.random_seed) + + def deterministic_reconstruction(self, x): + (y, h, loss, gw1, gb1, gw2, gb2) = trainfn(x, self.parameters.w1, self.parameters.b1, self.parameters.w2, self.parameters.b2) + return y + + def update(self, instances): """ Update the L{Model} using one training instance. - @param instance: A dict from feature index to (non-zero) value. + @param instances: A list of dict from feature index to (non-zero) value. @todo: Should assert that nonzero_indices and zero_indices are correct (i.e. are truly nonzero/zero). + @todo: Multiply L{self.weight_decay} by L{self.learning_rate}, as done in Semantic Hashing? + @todo: Decay the biases too? """ - x = numpy.zeros(globals.INPUT_DIMENSION) - for idx in instance.keys(): - x[idx] = instance[idx] + minibatch = len(instances) + x = pylearn.sparse_instance.to_vector(instances, self.input_dimension) - (y, h, loss, loss_unsummed, gw1, gb1, gw2, gb2, gy, gh) = trainfn(x, self.parameters.w1, self.parameters.b1, self.parameters.w2, self.parameters.b2) - print - print "instance:", instance - print "x:", x - print "OLD y:", y - print "OLD loss (unsummed):", loss_unsummed - print "gy:", gy - print "gh:", gh + (y, h, loss, gw1, gb1, gw2, gb2) = trainfn(x, self.parameters.w1, self.parameters.b1, self.parameters.w2, self.parameters.b2) +# print +# print "instance:", instance +# print "x:", x +# print "OLD y:", y print "OLD total loss:", loss - print "gw1:", gw1 - print "gb1:", gb1 - print "gw2:", gw2 - print "gb2:", gb2 +# print "gw1:", gw1 +# print "gb1:", gb1 +# print "gw2:", gw2 +# print "gb2:", gb2 + + self.parameters.w1 *= (1 - self.weight_decay) + self.parameters.w2 *= (1 - self.weight_decay) # SGD update - self.parameters.w1 -= LR * gw1 - self.parameters.b1 -= LR * gb1 - self.parameters.w2 -= LR * gw2 - self.parameters.b2 -= LR * gb2 + self.parameters.w1 -= self.learning_rate * gw1 / minibatch + self.parameters.b1 -= self.learning_rate * gb1 / minibatch + self.parameters.w2 -= self.learning_rate * gw2 / minibatch + self.parameters.b2 -= self.learning_rate * gb2 / minibatch - # Recompute the loss, to make sure it's descreasing - (y, h, loss, loss_unsummed, gw1, gb1, gw2, gb2, gy, gh) = trainfn(x, self.parameters.w1, self.parameters.b1, self.parameters.w2, self.parameters.b2) - print "NEW y:", y - print "NEW loss (unsummed):", loss_unsummed - print "gy:", gy - print "NEW total loss:", loss - print "h:", h - print self.parameters +# # Recompute the loss, to make sure it's descreasing +# (y, h, loss, gw1, gb1, gw2, gb2) = trainfn(x, self.parameters.w1, self.parameters.b1, self.parameters.w2, self.parameters.b2) +## print "NEW y:", y +# print "NEW total loss:", loss +## print "h:", h +## print self.parameters
--- a/sandbox/simple_autoassociator/parameters.py Mon Jul 14 13:48:36 2008 -0400 +++ b/sandbox/simple_autoassociator/parameters.py Mon Jul 14 13:48:41 2008 -0400 @@ -3,25 +3,24 @@ """ import numpy -import globals class Parameters: """ Parameters used by the L{Model}. """ - def __init__(self, input_dimension=globals.INPUT_DIMENSION, hidden_dimension=globals.HIDDEN_DIMENSION, randomly_initialize=False, seed=globals.SEED): + def __init__(self, input_dimension, hidden_dimension, randomly_initialize, random_seed): """ Initialize L{Model} parameters. @param randomly_initialize: If True, then randomly initialize according to the given seed. If False, then just use zeroes. """ if randomly_initialize: - numpy.random.seed(seed) + numpy.random.seed(random_seed) self.w1 = (numpy.random.rand(input_dimension, hidden_dimension)-0.5)/input_dimension self.w2 = (numpy.random.rand(hidden_dimension, input_dimension)-0.5)/hidden_dimension self.b1 = numpy.zeros(hidden_dimension) - #self.b2 = numpy.zeros(input_dimension) - self.b2 = numpy.array([10, 0, 0, -10]) + self.b2 = numpy.zeros(input_dimension) + #self.b2 = numpy.array([10, 0, 0, -10]) else: self.w1 = numpy.zeros((input_dimension, hidden_dimension)) self.w2 = numpy.zeros((hidden_dimension, input_dimension))