Mercurial > pylearn
changeset 430:c096e2820131
merge
author | Yoshua Bengio <bengioy@iro.umontreal.ca> |
---|---|
date | Tue, 29 Jul 2008 09:36:09 -0400 |
parents | 2bde0bed1919 (diff) 52b4908d8971 (current diff) |
children | 0f8c81b0776d |
files | |
diffstat | 6 files changed, 230 insertions(+), 218 deletions(-) [+] |
line wrap: on
line diff
--- a/gradient_learner.py Fri Jul 25 16:59:57 2008 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,71 +0,0 @@ - -from learner import * -from tensor import * -import gradient -from compile import Function - -class GradientLearner(Learner): - """ - Base class for gradient-based optimization of a training criterion - that can consist in two parts, an additive part over examples, and - an example-independent part (usually called the regularizer). - The user provides a Theano formula that maps the fields of a minibatch (each being a tensor with the - same number of rows = minibatch size) and parameters to output fields (for the use function), one of which - must be a cost that is the training criterion to be minimized. Subclasses implement - a training strategy that uses the Theano formula to compute gradients and - to compute outputs in the update method. - The inputs, parameters, and outputs are lists of Theano tensors, - while the example_wise_cost and regularization_term are Theano tensors. - The user can specify a regularization coefficient that multiplies the regularization term. - The training algorithm looks for parameters that minimize - regularization_coefficient * regularization_term(parameters) + - sum_{inputs in training_set} example_wise_cost(inputs,parameters) - i.e. the regularization_term should not depend on the inputs, only on the parameters. - The learned function can map a subset of inputs to a subset of outputs (as long as the inputs subset - includes all the inputs required in the Theano expression for the selected outputs). - It is assumed that all the inputs are provided in the training set (as dataset fields - with the corresponding name), but not necessarily when using the learned function. - """ - def __init__(self, inputs, parameters, outputs, example_wise_cost, regularization_term=astensor(0.0), - regularization_coefficient = astensor(1.0)): - self.inputs = inputs - self.outputs = outputs - self.parameters = parameters - self.example_wise_cost = example_wise_cost - self.regularization_term = regularization_term - self.regularization_coefficient = regularization_coefficient - self.parameters_example_wise_gradient = gradient.grad(example_wise_cost, parameters) - self.parameters_regularization_gradient = gradient.grad(self.regularization_coefficient * regularization_term, parameters) - if example_wise_cost not in outputs: - outputs.append(example_wise_cost) - if regularization_term not in outputs: - outputs.append(regularization_term) - self.example_wise_gradient_fn = Function(inputs + parameters, - [self.parameters_example_wise_gradient + self.parameters_regularization_gradient]) - self.use_functions = {frozenset([input.name for input in inputs]+[output.name for output in outputs]) - : Function(inputs, outputs)} - - def use(self,input_dataset,output_fields=None,copy_inputs=True): - # obtain the function that maps the desired inputs to desired outputs - input_fields = input_dataset.fieldNames() - # map names of input fields to Theano tensors in self.inputs - input_variables = ??? - if output_fields is None: output_fields = [output.name for output in outputs] - # handle special case of inputs that are directly copied into outputs - # map names of output fields to Theano tensors in self.outputs - output_variables = ??? - use_function_key = input_fields+output_fields - if not self.use_functions.has_key(use_function_key): - self.use_function[use_function_key]=Function(input_variables,output_variables) - use_function = self.use_functions[use_function_key] - # return a dataset that computes the outputs - return input_dataset.apply_function(use_function,input_fields,output_fields,copy_inputs,compute_now=True) - - -class StochasticGradientDescent(object): - def update_parameters(self): - -class StochasticGradientLearner(GradientLearner,StochasticGradientDescent): - def __init__(self,inputs, parameters, outputs, example_wise_cost, regularization_term=astensor(0.0), - regularization_coefficient = astensor(1.0),) - def update()
--- a/kernel_regression.py Fri Jul 25 16:59:57 2008 -0400 +++ b/kernel_regression.py Tue Jul 29 09:36:09 2008 -0400 @@ -82,9 +82,11 @@ - 'squared_error' (optionally produced by learned model if 'target' is provided) = example-wise squared error """ - def __init__(self, kernel=None, L2_regularizer=0, gamma=1): + def __init__(self, kernel=None, L2_regularizer=0, gamma=1, use_bias=False): + # THE VERSION WITH BIAS DOES NOT SEEM RIGHT self.kernel = kernel self.L2_regularizer=L2_regularizer + self.use_bias=use_bias self.gamma = gamma # until we fix things, the kernel type is fixed, Gaussian self.equations = KernelRegressionEquations() @@ -93,19 +95,22 @@ first_example = trainset[0] n_inputs = first_example['input'].size n_outputs = first_example['target'].size - M = numpy.zeros((n_examples+1,n_examples+1)) - Y = numpy.zeros((n_examples+1,n_outputs)) + b1=1 if self.use_bias else 0 + M = numpy.zeros((n_examples+b1,n_examples+b1)) + Y = numpy.zeros((n_examples+b1,n_outputs)) for i in xrange(n_examples): - M[i+1,i+1]=self.L2_regularizer + M[i+b1,i+b1]=self.L2_regularizer data = trainset.fields() train_inputs = numpy.array(data['input']) - Y[0]=1 - Y[1:,:] = numpy.array(data['target']) + if self.use_bias: + Y[0]=1 + Y[b1:,:] = numpy.array(data['target']) train_inputs_square,sumG,G=self.equations.compute_system_matrix(train_inputs,self.gamma) - M[1:,1:] += G - M[0,1:] = sumG - M[1:,0] = 1 - M[0,0] = M.shape[0] + M[b1:,b1:] += G + if self.use_bias: + M[0,1:] = sumG + M[1:,0] = 1 + M[0,0] = M.shape[0] self.M=M self.Y=Y theta=numpy.linalg.solve(M,Y) @@ -117,10 +122,11 @@ inputs = T.matrix() # minibatchsize x n_inputs targets = T.matrix() # minibatchsize x n_outputs theta = T.matrix() # (n_examples+1) x n_outputs + b1 = T.shape(train_inputs_square)[0]<T.shape(theta)[0] gamma = T.scalar() inv_gamma2 = 1./(gamma*gamma) - b = theta[0] - alpha = theta[1:,:] + b = b1*theta[0] + alpha = theta[b1:,:] inputs_square = T.sum(inputs*inputs,axis=1) Kx = T.exp(-(row_vector(train_inputs_square)-2*T.dot(inputs,train_inputs.T)+col_vector(inputs_square))*inv_gamma2) outputs = T.dot(Kx,alpha) + b # minibatchsize x n_outputs
--- a/linear_regression.py Fri Jul 25 16:59:57 2008 -0400 +++ b/linear_regression.py Tue Jul 29 09:36:09 2008 -0400 @@ -35,14 +35,6 @@ The predictor parameters are obtained analytically from the training set. - *** NOT IMPLEMENTED YET *** - Training can proceed sequentially (with multiple calls to update with - different disjoint subsets of the training sets). After each call to - update the predictor is ready to be used (and optimized for the union - of all the training sets passed to update since construction or since - the last call to forget). - *************************** - For each (input[t],output[t]) pair in a minibatch,:: output_t = b + W * input_t @@ -189,3 +181,17 @@ return ds +#TODO : an online version +class OnlineLinearRegression(OnlineLearningAlgorithm): + """ + Training can proceed sequentially (with multiple calls to update with + different disjoint subsets of the training sets). After each call to + update the predictor is ready to be used (and optimized for the union + of all the training sets passed to update since construction or since + the last call to forget). + """ + pass + + + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sandbox/gradient_learner.py Tue Jul 29 09:36:09 2008 -0400 @@ -0,0 +1,71 @@ + +from learner import * +from tensor import * +import gradient +from compile import Function + +class GradientLearner(Learner): + """ + Base class for gradient-based optimization of a training criterion + that can consist in two parts, an additive part over examples, and + an example-independent part (usually called the regularizer). + The user provides a Theano formula that maps the fields of a minibatch (each being a tensor with the + same number of rows = minibatch size) and parameters to output fields (for the use function), one of which + must be a cost that is the training criterion to be minimized. Subclasses implement + a training strategy that uses the Theano formula to compute gradients and + to compute outputs in the update method. + The inputs, parameters, and outputs are lists of Theano tensors, + while the example_wise_cost and regularization_term are Theano tensors. + The user can specify a regularization coefficient that multiplies the regularization term. + The training algorithm looks for parameters that minimize + regularization_coefficient * regularization_term(parameters) + + sum_{inputs in training_set} example_wise_cost(inputs,parameters) + i.e. the regularization_term should not depend on the inputs, only on the parameters. + The learned function can map a subset of inputs to a subset of outputs (as long as the inputs subset + includes all the inputs required in the Theano expression for the selected outputs). + It is assumed that all the inputs are provided in the training set (as dataset fields + with the corresponding name), but not necessarily when using the learned function. + """ + def __init__(self, inputs, parameters, outputs, example_wise_cost, regularization_term=astensor(0.0), + regularization_coefficient = astensor(1.0)): + self.inputs = inputs + self.outputs = outputs + self.parameters = parameters + self.example_wise_cost = example_wise_cost + self.regularization_term = regularization_term + self.regularization_coefficient = regularization_coefficient + self.parameters_example_wise_gradient = gradient.grad(example_wise_cost, parameters) + self.parameters_regularization_gradient = gradient.grad(self.regularization_coefficient * regularization_term, parameters) + if example_wise_cost not in outputs: + outputs.append(example_wise_cost) + if regularization_term not in outputs: + outputs.append(regularization_term) + self.example_wise_gradient_fn = Function(inputs + parameters, + [self.parameters_example_wise_gradient + self.parameters_regularization_gradient]) + self.use_functions = {frozenset([input.name for input in inputs]+[output.name for output in outputs]) + : Function(inputs, outputs)} + + def use(self,input_dataset,output_fields=None,copy_inputs=True): + # obtain the function that maps the desired inputs to desired outputs + input_fields = input_dataset.fieldNames() + # map names of input fields to Theano tensors in self.inputs + input_variables = ??? + if output_fields is None: output_fields = [output.name for output in outputs] + # handle special case of inputs that are directly copied into outputs + # map names of output fields to Theano tensors in self.outputs + output_variables = ??? + use_function_key = input_fields+output_fields + if not self.use_functions.has_key(use_function_key): + self.use_function[use_function_key]=Function(input_variables,output_variables) + use_function = self.use_functions[use_function_key] + # return a dataset that computes the outputs + return input_dataset.apply_function(use_function,input_fields,output_fields,copy_inputs,compute_now=True) + + +class StochasticGradientDescent(object): + def update_parameters(self): + +class StochasticGradientLearner(GradientLearner,StochasticGradientDescent): + def __init__(self,inputs, parameters, outputs, example_wise_cost, regularization_term=astensor(0.0), + regularization_coefficient = astensor(1.0),) + def update()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sandbox/statscollector.py Tue Jul 29 09:36:09 2008 -0400 @@ -0,0 +1,127 @@ + +# Here is how I see stats collectors: + +def my_stats(graph): + graph.mse=examplewise_mean(square_norm(graph.residue)) + graph.training_loss=graph.regularizer+examplewise_sum(graph.nll) + return [graph.mse,graph.training_loss] + + +# def my_stats(residue,nll,regularizer): +# mse=examplewise_mean(square_norm(residue)) +# training_loss=regularizer+examplewise_sum(nll) +# set_names(locals()) +# return ((residue,nll),(regularizer),(),(mse,training_loss)) +# my_stats_collector = make_stats_collector(my_stats) +# +# where make_stats_collector calls my_stats(examplewise_fields, attributes) to +# construct its update function, and figure out what are the input fields (here "residue" +# and "nll") and input attributes (here "regularizer") it needs, and the output +# attributes that it computes (here "mse" and "training_loss"). Remember that +# fields are examplewise quantities, but attributes are not, in my jargon. +# In the above example, I am highlighting that some operations done in my_stats +# are examplewise and some are not. I am hoping that theano Ops can do these +# kinds of internal side-effect operations (and proper initialization of these hidden +# variables). I expect that a StatsCollector (returned by make_stats_collector) +# knows the following methods: +# stats_collector.input_fieldnames +# stats_collector.input_attribute_names +# stats_collector.output_attribute_names +# stats_collector.update(mini_dataset) +# stats_collector['mse'] +# where mini_dataset has the input_fieldnames() as fields and the input_attribute_names() +# as attributes, and in the resulting dataset the output_attribute_names() are set to the +# proper numeric values. + + + +import theano +from theano import tensor as t +from Learner import Learner +from lookup_list import LookupList + +class StatsCollectorModel(AttributesHolder): + def __init__(self,stats_collector): + self.stats_collector = stats_collector + self.outputs = LookupList(stats_collector.output_names,[None for name in stats_collector.output_names]) + # the statistics get initialized here + self.update_function = theano.function(input_attributes+input_fields,output_attributes+output_fields,linker="c|py") + for name,value in self.outputs.items(): + self.__setattribute__(name,value) + def update(self,dataset): + input_fields = dataset.fields()(self.stats_collector.input_field_names) + input_attributes = dataset.getAttributes(self.stats_collector.input_attribute_names) + self.outputs._values = self.update_function(input_attributes+input_fields) + for name,value in self.outputs.items(): + self.__setattribute__(name,value) + def __call__(self): + return self.outputs + def attributeNames(self): + return self.outputs.keys() + +class StatsCollector(AttributesHolder): + + def __init__(self,input_attributes, input_fields, outputs): + self.input_attributes = input_attributes + self.input_fields = input_fields + self.outputs = outputs + self.input_attribute_names = [v.name for v in input_attributes] + self.input_field_names = [v.name for v in input_fields] + self.output_names = [v.name for v in output_attributes] + + def __call__(self,dataset=None): + model = StatsCollectorModel(self) + if dataset: + self.update(dataset) + return model + +if __name__ == '__main__': + def my_statscollector(): + regularizer = t.scalar() + nll = t.matrix() + class_error = t.matrix() + total_loss = regularizer+t.examplewise_sum(nll) + avg_nll = t.examplewise_mean(nll) + avg_class_error = t.examplewise_mean(class_error) + for name,val in locals().items(): val.name = name + return StatsCollector([regularizer],[nll,class_error],[total_loss,avg_nll,avg_class_error]) + + + + +# OLD DESIGN: +# +# class StatsCollector(object): +# """A StatsCollector object is used to record performance statistics during training +# or testing of a learner. It can be configured to measure different things and +# accumulate the appropriate statistics. From these statistics it can be interrogated +# to obtain performance measures of interest (such as maxima, minima, mean, standard +# deviation, standard error, etc.). Optionally, the observations can be weighted +# (yielded weighted mean, weighted variance, etc., where applicable). The statistics +# that are desired can be specified among a list supported by the StatsCollector +# class or subclass. When some statistics are requested, others become automatically +# available (e.g., sum or mean).""" +# +# default_statistics = [mean,standard_deviation,min,max] +# +# __init__(self,n_quantities_observed, statistics=default_statistics): +# self.n_quantities_observed=n_quantities_observed +# +# clear(self): +# raise NotImplementedError +# +# update(self,observations): +# """The observations is a numpy vector of length n_quantities_observed. Some +# entries can be 'missing' (with a NaN entry) and will not be counted in the +# statistics.""" +# raise NotImplementedError +# +# __getattr__(self, statistic) +# """Return a particular statistic, which may be inferred from the collected statistics. +# The argument is a string naming that statistic.""" + + + + + +
--- a/statscollector.py Fri Jul 25 16:59:57 2008 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,127 +0,0 @@ - -# Here is how I see stats collectors: - -def my_stats(graph): - graph.mse=examplewise_mean(square_norm(graph.residue)) - graph.training_loss=graph.regularizer+examplewise_sum(graph.nll) - return [graph.mse,graph.training_loss] - - -# def my_stats(residue,nll,regularizer): -# mse=examplewise_mean(square_norm(residue)) -# training_loss=regularizer+examplewise_sum(nll) -# set_names(locals()) -# return ((residue,nll),(regularizer),(),(mse,training_loss)) -# my_stats_collector = make_stats_collector(my_stats) -# -# where make_stats_collector calls my_stats(examplewise_fields, attributes) to -# construct its update function, and figure out what are the input fields (here "residue" -# and "nll") and input attributes (here "regularizer") it needs, and the output -# attributes that it computes (here "mse" and "training_loss"). Remember that -# fields are examplewise quantities, but attributes are not, in my jargon. -# In the above example, I am highlighting that some operations done in my_stats -# are examplewise and some are not. I am hoping that theano Ops can do these -# kinds of internal side-effect operations (and proper initialization of these hidden -# variables). I expect that a StatsCollector (returned by make_stats_collector) -# knows the following methods: -# stats_collector.input_fieldnames -# stats_collector.input_attribute_names -# stats_collector.output_attribute_names -# stats_collector.update(mini_dataset) -# stats_collector['mse'] -# where mini_dataset has the input_fieldnames() as fields and the input_attribute_names() -# as attributes, and in the resulting dataset the output_attribute_names() are set to the -# proper numeric values. - - - -import theano -from theano import tensor as t -from Learner import Learner -from lookup_list import LookupList - -class StatsCollectorModel(AttributesHolder): - def __init__(self,stats_collector): - self.stats_collector = stats_collector - self.outputs = LookupList(stats_collector.output_names,[None for name in stats_collector.output_names]) - # the statistics get initialized here - self.update_function = theano.function(input_attributes+input_fields,output_attributes+output_fields,linker="c|py") - for name,value in self.outputs.items(): - self.__setattribute__(name,value) - def update(self,dataset): - input_fields = dataset.fields()(self.stats_collector.input_field_names) - input_attributes = dataset.getAttributes(self.stats_collector.input_attribute_names) - self.outputs._values = self.update_function(input_attributes+input_fields) - for name,value in self.outputs.items(): - self.__setattribute__(name,value) - def __call__(self): - return self.outputs - def attributeNames(self): - return self.outputs.keys() - -class StatsCollector(AttributesHolder): - - def __init__(self,input_attributes, input_fields, outputs): - self.input_attributes = input_attributes - self.input_fields = input_fields - self.outputs = outputs - self.input_attribute_names = [v.name for v in input_attributes] - self.input_field_names = [v.name for v in input_fields] - self.output_names = [v.name for v in output_attributes] - - def __call__(self,dataset=None): - model = StatsCollectorModel(self) - if dataset: - self.update(dataset) - return model - -if __name__ == '__main__': - def my_statscollector(): - regularizer = t.scalar() - nll = t.matrix() - class_error = t.matrix() - total_loss = regularizer+t.examplewise_sum(nll) - avg_nll = t.examplewise_mean(nll) - avg_class_error = t.examplewise_mean(class_error) - for name,val in locals().items(): val.name = name - return StatsCollector([regularizer],[nll,class_error],[total_loss,avg_nll,avg_class_error]) - - - - -# OLD DESIGN: -# -# class StatsCollector(object): -# """A StatsCollector object is used to record performance statistics during training -# or testing of a learner. It can be configured to measure different things and -# accumulate the appropriate statistics. From these statistics it can be interrogated -# to obtain performance measures of interest (such as maxima, minima, mean, standard -# deviation, standard error, etc.). Optionally, the observations can be weighted -# (yielded weighted mean, weighted variance, etc., where applicable). The statistics -# that are desired can be specified among a list supported by the StatsCollector -# class or subclass. When some statistics are requested, others become automatically -# available (e.g., sum or mean).""" -# -# default_statistics = [mean,standard_deviation,min,max] -# -# __init__(self,n_quantities_observed, statistics=default_statistics): -# self.n_quantities_observed=n_quantities_observed -# -# clear(self): -# raise NotImplementedError -# -# update(self,observations): -# """The observations is a numpy vector of length n_quantities_observed. Some -# entries can be 'missing' (with a NaN entry) and will not be counted in the -# statistics.""" -# raise NotImplementedError -# -# __getattr__(self, statistic) -# """Return a particular statistic, which may be inferred from the collected statistics. -# The argument is a string naming that statistic.""" - - - - - -