# HG changeset patch # User Yoshua Bengio # Date 1216754425 14400 # Node ID d7611a3811f2be8297d442dbc0825312f9d92470 # Parent e2b46a8f2b7b0f50051b0711ea25d4b1be6c9cb1 Moved incomplete stuff to sandbox diff -r e2b46a8f2b7b -r d7611a3811f2 gradient_learner.py --- a/gradient_learner.py Sat Jul 19 17:57:46 2008 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,71 +0,0 @@ - -from learner import * -from tensor import * -import gradient -from compile import Function - -class GradientLearner(Learner): - """ - Base class for gradient-based optimization of a training criterion - that can consist in two parts, an additive part over examples, and - an example-independent part (usually called the regularizer). - The user provides a Theano formula that maps the fields of a minibatch (each being a tensor with the - same number of rows = minibatch size) and parameters to output fields (for the use function), one of which - must be a cost that is the training criterion to be minimized. Subclasses implement - a training strategy that uses the Theano formula to compute gradients and - to compute outputs in the update method. - The inputs, parameters, and outputs are lists of Theano tensors, - while the example_wise_cost and regularization_term are Theano tensors. - The user can specify a regularization coefficient that multiplies the regularization term. - The training algorithm looks for parameters that minimize - regularization_coefficient * regularization_term(parameters) + - sum_{inputs in training_set} example_wise_cost(inputs,parameters) - i.e. the regularization_term should not depend on the inputs, only on the parameters. - The learned function can map a subset of inputs to a subset of outputs (as long as the inputs subset - includes all the inputs required in the Theano expression for the selected outputs). - It is assumed that all the inputs are provided in the training set (as dataset fields - with the corresponding name), but not necessarily when using the learned function. - """ - def __init__(self, inputs, parameters, outputs, example_wise_cost, regularization_term=astensor(0.0), - regularization_coefficient = astensor(1.0)): - self.inputs = inputs - self.outputs = outputs - self.parameters = parameters - self.example_wise_cost = example_wise_cost - self.regularization_term = regularization_term - self.regularization_coefficient = regularization_coefficient - self.parameters_example_wise_gradient = gradient.grad(example_wise_cost, parameters) - self.parameters_regularization_gradient = gradient.grad(self.regularization_coefficient * regularization_term, parameters) - if example_wise_cost not in outputs: - outputs.append(example_wise_cost) - if regularization_term not in outputs: - outputs.append(regularization_term) - self.example_wise_gradient_fn = Function(inputs + parameters, - [self.parameters_example_wise_gradient + self.parameters_regularization_gradient]) - self.use_functions = {frozenset([input.name for input in inputs]+[output.name for output in outputs]) - : Function(inputs, outputs)} - - def use(self,input_dataset,output_fields=None,copy_inputs=True): - # obtain the function that maps the desired inputs to desired outputs - input_fields = input_dataset.fieldNames() - # map names of input fields to Theano tensors in self.inputs - input_variables = ??? - if output_fields is None: output_fields = [output.name for output in outputs] - # handle special case of inputs that are directly copied into outputs - # map names of output fields to Theano tensors in self.outputs - output_variables = ??? - use_function_key = input_fields+output_fields - if not self.use_functions.has_key(use_function_key): - self.use_function[use_function_key]=Function(input_variables,output_variables) - use_function = self.use_functions[use_function_key] - # return a dataset that computes the outputs - return input_dataset.apply_function(use_function,input_fields,output_fields,copy_inputs,compute_now=True) - - -class StochasticGradientDescent(object): - def update_parameters(self): - -class StochasticGradientLearner(GradientLearner,StochasticGradientDescent): - def __init__(self,inputs, parameters, outputs, example_wise_cost, regularization_term=astensor(0.0), - regularization_coefficient = astensor(1.0),) - def update() diff -r e2b46a8f2b7b -r d7611a3811f2 kernel_regression.py --- a/kernel_regression.py Sat Jul 19 17:57:46 2008 -0400 +++ b/kernel_regression.py Tue Jul 22 15:20:25 2008 -0400 @@ -82,9 +82,11 @@ - 'squared_error' (optionally produced by learned model if 'target' is provided) = example-wise squared error """ - def __init__(self, kernel=None, L2_regularizer=0, gamma=1): + def __init__(self, kernel=None, L2_regularizer=0, gamma=1, use_bias=False): + # THE VERSION WITH BIAS DOES NOT SEEM RIGHT self.kernel = kernel self.L2_regularizer=L2_regularizer + self.use_bias=use_bias self.gamma = gamma # until we fix things, the kernel type is fixed, Gaussian self.equations = KernelRegressionEquations() @@ -93,19 +95,22 @@ first_example = trainset[0] n_inputs = first_example['input'].size n_outputs = first_example['target'].size - M = numpy.zeros((n_examples+1,n_examples+1)) - Y = numpy.zeros((n_examples+1,n_outputs)) + b1=1 if self.use_bias else 0 + M = numpy.zeros((n_examples+b1,n_examples+b1)) + Y = numpy.zeros((n_examples+b1,n_outputs)) for i in xrange(n_examples): - M[i+1,i+1]=self.L2_regularizer + M[i+b1,i+b1]=self.L2_regularizer data = trainset.fields() train_inputs = numpy.array(data['input']) - Y[0]=1 - Y[1:,:] = numpy.array(data['target']) + if self.use_bias: + Y[0]=1 + Y[b1:,:] = numpy.array(data['target']) train_inputs_square,sumG,G=self.equations.compute_system_matrix(train_inputs,self.gamma) - M[1:,1:] += G - M[0,1:] = sumG - M[1:,0] = 1 - M[0,0] = M.shape[0] + M[b1:,b1:] += G + if self.use_bias: + M[0,1:] = sumG + M[1:,0] = 1 + M[0,0] = M.shape[0] self.M=M self.Y=Y theta=numpy.linalg.solve(M,Y) @@ -117,10 +122,11 @@ inputs = T.matrix() # minibatchsize x n_inputs targets = T.matrix() # minibatchsize x n_outputs theta = T.matrix() # (n_examples+1) x n_outputs + b1 = T.shape(train_inputs_square)[0]