Mercurial > pylearn
view linear_regression.py @ 107:c4916445e025
Comments from Pascal V.
author | Yoshua Bengio <bengioy@iro.umontreal.ca> |
---|---|
date | Tue, 06 May 2008 19:54:43 -0400 |
parents | c4726e19b8ec |
children | 8fa1ef2411a0 |
line wrap: on
line source
from learner import * from theano import tensor as t from compile import Function from theano.scalar import as_scalar # this is one of the simplest example of learner, and illustrates # the use of theano class LinearRegression(OneShotTLearner): """ Implement linear regression, with or without L2 regularization (the former is called Ridge Regression and the latter Ordinary Least Squares). The predictor parameters are obtained analytically from the training set. Training can proceed sequentially (with multiple calls to update with different disjoint subsets of the training sets). After each call to update the predictor is ready to be used (and optimized for the union of all the training sets passed to update since construction or since the last call to forget). The L2 regularization coefficient is obtained analytically. For each (input[t],output[t]) pair in a minibatch,:: output_t = b + W * input_t where b and W are obtained by minimizing:: lambda sum_{ij} W_{ij}^2 + sum_t ||output_t - target_t||^2 Let X be the whole training set inputs matrix (one input example per row), with the first column full of 1's, and Let Y the whole training set targets matrix (one example's target vector per row). Let theta = the matrix with b in its first column and W in the others, then each theta[:,i] is the solution of the linear system:: XtX * theta[:,i] = XtY[:,i] where XtX is a (n_inputs+1)x(n_inputs+1) matrix containing X'*X plus lambda on the diagonal except at (0,0), and XtY is a (n_inputs+1)*n_outputs matrix containing X'*Y. The fields and attributes expected and produced by use and update are the following: - Input and output fields (example-wise quantities): - 'input' (always expected by use and update as an input_dataset field) - 'target' (optionally expected by use and update as an input_dataset field) - 'output' (optionally produced by use as an output dataset field) - 'squared_error' (optionally produced by use as an output dataset field, needs 'target') = example-wise squared error - optional attributes (optionally expected as input_dataset attributes) (warning, this may be dangerous, the 'use' method will use those provided in the input_dataset rather than those learned during 'update'; currently no support for providing these to update): - 'lambda' - 'b' - 'W' - 'regularization_term' """ def attributeNames(self): return ["lambda","b","W","regularization_term"] def __init__(self): self.input = t.matrix('input') # n_examples x n_inputs self.target = t.matrix('target') # n_examples x n_outputs self.lambda = as_scalar(0.,'lambda') self.theta = t.matrix('theta') self.W = self.theta[:,1:] self.b = self.theta[:,0] self.XtX = t.matrix('XtX') self.XtY = t.matrix('XtY') self.regularizer = self.lambda * t.dot(self.W,self.W) self.squared_error = self.loss = self.regularizer + t.sum(self.squared_error) # this only makes sense if the whole training set fits in memory in a minibatch self.loss_function = Function([self.W,self.lambda,self.squared_error],[self.loss]) self.new_XtX = self.XtX + t.dot(self.extended_input.T,self.extended_input) self.new_XtY = self.XtY + t.dot(self.extended_input.T,self.target) self.new_theta = t.solve(self.XtX,self.XtY) def initialize(self): self.XtX.resize((1+self.n_inputs,1+self.n_inputs)) self.XtY.resize((1+self.n_inputs,self.n_outputs)) self.XtX.data[:,:]=0 self.XtY.data[:,:]=0 numpy.diag(self.XtX.data)[1:]=self.lambda.data def updated_variables(self): def minibatch_wise_inputs(self): def minibatch_wise_outputs(self): # self.input is a (n_examples, n_inputs) minibatch matrix self.extended_input = t.prepend_one_to_each_row(self.input) self.output = t.dot(self.input,self.W.T) + self.b # (n_examples , n_outputs) matrix self.squared_error = t.sum_within_rows(t.sqr(self.output-self.target)) # (n_examples ) vector def attributeNames(self): return ["lambda","b","W","regularization_term","XtX","XtY"] def defaultOutputFields(self, input_fields): output_fields = ["output"] if "target" in input_fields: output_fields.append("squared_error") return output_fields # poutine generale basee sur ces fonctions def __init__(self,lambda=0.,max_memory_use=500): """ @type lambda: float @param lambda: regularization coefficient """ W=t.matrix('W') # b is a broadcastable row vector (can be replicated into # as many rows as there are examples in the minibach) b=t.row('b') minibatch_input = t.matrix('input') # n_examples x n_inputs minibatch_target = t.matrix('target') # n_examples x n_outputs minibatch_output = t.dot(minibatch_input,W.T) + b # n_examples x n_outputs lambda = as_scalar(lambda) regularizer = self.lambda * t.dot(W,W) example_squared_error = t.sum_within_rows(t.sqr(minibatch_output-minibatch_target)) self.output_function = Function([W,b,minibatch_input],[minibatch_output]) self.squared_error_function = Function([minibatch_output,minibatch_target],[self.example_squared_error]) self.loss_function = Function([W,squared_error],[self.regularizer + t.sum(self.example_squared_error)]) self.W=None self.b=None self.XtX=None self.XtY=None def forget(self): if self.W: self.XtX *= 0 self.XtY *= 0 def use(self,input_dataset,output_fieldnames=None,copy_inputs=True): input_fieldnames = input_dataset.fieldNames() assert "input" in input_fieldnames if not output_fields: output_fields = ["output"] if "target" in input_fieldnames: output_fields += ["squared_error"] else: if "squared_error" in output_fields or "total_loss" in output_fields: assert "target" in input_fieldnames use_functions = [] for output_fieldname in output_fieldnames: if output_fieldname=="output": use_functions.append(self.output_function) elif output_fieldname=="squared_error": use_functions.append(lambda self.output_function) n_examples = len(input_dataset) for minibatch in input_dataset.minibatches(minibatch_size=minibatch_size, allow_odd_last_minibatch=True): use_function(