Mercurial > pylearn
changeset 376:c9a89be5cb0a
Redesigning linear_regression
author | Yoshua Bengio <bengioy@iro.umontreal.ca> |
---|---|
date | Mon, 07 Jul 2008 10:08:35 -0400 |
parents | 12ce29abf27d (current diff) 90a29489b5c8 (diff) |
children | 67c339260875 |
files | linear_regression.py misc.py |
diffstat | 10 files changed, 682 insertions(+), 174 deletions(-) [+] |
line wrap: on
line diff
--- a/_test_dataset.py Mon Jun 16 17:47:36 2008 -0400 +++ b/_test_dataset.py Mon Jul 07 10:08:35 2008 -0400 @@ -2,7 +2,7 @@ from dataset import * from math import * import numpy, unittest, sys -from misc import * +#from misc import * from lookup_list import LookupList def have_raised(to_eval, **var): @@ -134,12 +134,13 @@ # - for minibatch in dataset.minibatches([field1, field2, ...],minibatch_size=N): i=0 mi=0 - m=ds.minibatches(['x','z'], minibatch_size=3) - assert isinstance(m,DataSet.MinibatchWrapAroundIterator) + size=3 + m=ds.minibatches(['x','z'], minibatch_size=size) + assert hasattr(m,'__iter__') for minibatch in m: - assert isinstance(minibatch,DataSetFields) + assert isinstance(minibatch,LookupList) assert len(minibatch)==2 - test_minibatch_size(minibatch,m.minibatch_size,len(ds),2,mi) + test_minibatch_size(minibatch,size,len(ds),2,mi) if type(ds)==ArrayDataSet: assert (minibatch[0][:,::2]==minibatch[1]).all() else: @@ -147,92 +148,103 @@ (minibatch[0][j][::2]==minibatch[1][j]).all() mi+=1 i+=len(minibatch[0]) - assert i==len(ds) - assert mi==4 - del minibatch,i,m,mi + assert i==(len(ds)/size)*size + assert mi==(len(ds)/size) + del minibatch,i,m,mi,size i=0 mi=0 - m=ds.minibatches(['x','y'], minibatch_size=3) - assert isinstance(m,DataSet.MinibatchWrapAroundIterator) + size=3 + m=ds.minibatches(['x','y'], minibatch_size=size) + assert hasattr(m,'__iter__') for minibatch in m: + assert isinstance(minibatch,LookupList) assert len(minibatch)==2 - test_minibatch_size(minibatch,m.minibatch_size,len(ds),2,mi) + test_minibatch_size(minibatch,size,len(ds),2,mi) mi+=1 for id in range(len(minibatch[0])): assert (numpy.append(minibatch[0][id],minibatch[1][id])==array[i]).all() i+=1 - assert i==len(ds) - assert mi==4 - del minibatch,i,id,m,mi + assert i==(len(ds)/size)*size + assert mi==(len(ds)/size) + del minibatch,i,id,m,mi,size # - for mini1,mini2,mini3 in dataset.minibatches([field1, field2, field3], minibatch_size=N): i=0 mi=0 - m=ds.minibatches(['x','z'], minibatch_size=3) - assert isinstance(m,DataSet.MinibatchWrapAroundIterator) + size=3 + m=ds.minibatches(['x','z'], minibatch_size=size) + assert hasattr(m,'__iter__') for x,z in m: - test_minibatch_field_size(x,m.minibatch_size,len(ds),mi) - test_minibatch_field_size(z,m.minibatch_size,len(ds),mi) + test_minibatch_field_size(x,size,len(ds),mi) + test_minibatch_field_size(z,size,len(ds),mi) for id in range(len(x)): assert (x[id][::2]==z[id]).all() i+=1 mi+=1 - assert i==len(ds) - assert mi==4 - del x,z,i,m,mi + assert i==(len(ds)/size)*size + assert mi==(len(ds)/size) + del x,z,i,m,mi,size + i=0 mi=0 + size=3 m=ds.minibatches(['x','y'], minibatch_size=3) + assert hasattr(m,'__iter__') for x,y in m: - test_minibatch_field_size(x,m.minibatch_size,len(ds),mi) - test_minibatch_field_size(y,m.minibatch_size,len(ds),mi) + assert len(x)==size + assert len(y)==size + test_minibatch_field_size(x,size,len(ds),mi) + test_minibatch_field_size(y,size,len(ds),mi) mi+=1 for id in range(len(x)): assert (numpy.append(x[id],y[id])==array[i]).all() i+=1 - assert i==len(ds) - assert mi==4 - del x,y,i,id,m,mi + assert i==(len(ds)/size)*size + assert mi==(len(ds)/size) + del x,y,i,id,m,mi,size #not in doc i=0 - m=ds.minibatches(['x','y'],n_batches=1,minibatch_size=3,offset=4) - assert isinstance(m,DataSet.MinibatchWrapAroundIterator) + size=3 + m=ds.minibatches(['x','y'],n_batches=1,minibatch_size=size,offset=4) + assert hasattr(m,'__iter__') for x,y in m: - assert len(x)==m.minibatch_size - assert len(y)==m.minibatch_size - for id in range(m.minibatch_size): + assert len(x)==size + assert len(y)==size + for id in range(size): assert (numpy.append(x[id],y[id])==array[i+4]).all() i+=1 - assert i==m.n_batches*m.minibatch_size - del x,y,i,id,m + assert i==size + del x,y,i,id,m,size i=0 - m=ds.minibatches(['x','y'],n_batches=2,minibatch_size=3,offset=4) - assert isinstance(m,DataSet.MinibatchWrapAroundIterator) + size=3 + m=ds.minibatches(['x','y'],n_batches=2,minibatch_size=size,offset=4) + assert hasattr(m,'__iter__') for x,y in m: - assert len(x)==m.minibatch_size - assert len(y)==m.minibatch_size - for id in range(m.minibatch_size): + assert len(x)==size + assert len(y)==size + for id in range(size): assert (numpy.append(x[id],y[id])==array[i+4]).all() i+=1 - assert i==m.n_batches*m.minibatch_size - del x,y,i,id,m + assert i==2*size + del x,y,i,id,m,size i=0 - m=ds.minibatches(['x','y'],n_batches=20,minibatch_size=3,offset=4) - assert isinstance(m,DataSet.MinibatchWrapAroundIterator) + size=3 + m=ds.minibatches(['x','y'],n_batches=20,minibatch_size=size,offset=4) + assert hasattr(m,'__iter__') for x,y in m: - assert len(x)==m.minibatch_size - assert len(y)==m.minibatch_size - for id in range(m.minibatch_size): + assert len(x)==size + assert len(y)==size + for id in range(size): assert (numpy.append(x[id],y[id])==array[(i+4)%array.shape[0]]).all() i+=1 - assert i==m.n_batches*m.minibatch_size - del x,y,i,id + assert i==2*size # should not wrap + del x,y,i,id,size - assert not have_raised2(ds.minibatches,['x','y'],n_batches=1,minibatch_size=len(array)+1,offset=0) + assert have_raised2(ds.minibatches,['x','y'],n_batches=1,minibatch_size=len(array)+1,offset=0) assert not have_raised2(ds.minibatches,['x','y'],n_batches=1,minibatch_size=len(array),offset=0) def test_ds_iterator(array,iterator1,iterator2,iterator3): @@ -262,14 +274,17 @@ def test_getitem(array,ds): def test_ds(orig,ds,index): i=0 - assert len(ds)==len(index) - for x,z,y in ds('x','z','y'): - assert (orig[index[i]]['x']==array[index[i]][:3]).all() - assert (orig[index[i]]['x']==x).all() - assert orig[index[i]]['y']==array[index[i]][3] - assert (orig[index[i]]['y']==y).all() # why does it crash sometimes? - assert (orig[index[i]]['z']==array[index[i]][0:3:2]).all() - assert (orig[index[i]]['z']==z).all() + assert isinstance(ds,LookupList) + assert len(ds)==3 + assert len(ds[0])==len(index) +# for x,z,y in ds('x','z','y'): + for idx in index: + assert (orig[idx]['x']==array[idx][:3]).all() + assert (orig[idx]['x']==ds['x'][i]).all() + assert orig[idx]['y']==array[idx][3] + assert (orig[idx]['y']==ds['y'][i]).all() # why does it crash sometimes? + assert (orig[idx]['z']==array[idx][0:3:2]).all() + assert (orig[idx]['z']==ds['z'][i]).all() i+=1 del i ds[0] @@ -282,19 +297,22 @@ for x in ds: pass -#ds[:n] returns a dataset with the n first examples. +#ds[:n] returns a LookupList with the n first examples. ds2=ds[:3] - assert isinstance(ds2,LookupList) test_ds(ds,ds2,index=[0,1,2]) del ds2 -#ds[i1:i2:s]# returns a ds with the examples i1,i1+s,...i2-s. - ds2=ds.subset[1:7:2] - assert isinstance(ds2,DataSet) +#ds[i:j] returns a LookupList with examples i,i+1,...,j-1. + ds2=ds[1:3] + test_ds(ds,ds2,index=[1,2]) + del ds2 + +#ds[i1:i2:s] returns a LookupList with the examples i1,i1+s,...i2-s. + ds2=ds[1:7:2] test_ds(ds,ds2,[1,3,5]) del ds2 -#ds[i] +#ds[i] returns the (i+1)-th example of the dataset. ds2=ds[5] assert isinstance(ds2,Example) assert have_raised("var['ds']["+str(len(ds))+"]",ds=ds) # index not defined @@ -302,8 +320,8 @@ del ds2 #ds[[i1,i2,...in]]# returns a ds with examples i1,i2,...in. - ds2=ds.subset[[4,7,2,8]] - assert isinstance(ds2,DataSet) + ds2=ds[[4,7,2,8]] +# assert isinstance(ds2,DataSet) test_ds(ds,ds2,[4,7,2,8]) del ds2 @@ -326,6 +344,71 @@ # del i,example #* ds1 & ds2 & ds3 == ds.vstack([ds1,ds2,ds3])#???? +def test_subset(array,ds): + def test_ds(orig,ds,index): + i=0 + assert isinstance(ds2,DataSet) + assert len(ds)==len(index) + for x,z,y in ds('x','z','y'): + assert (orig[index[i]]['x']==array[index[i]][:3]).all() + assert (orig[index[i]]['x']==x).all() + assert orig[index[i]]['y']==array[index[i]][3] + assert orig[index[i]]['y']==y + assert (orig[index[i]]['z']==array[index[i]][0:3:2]).all() + assert (orig[index[i]]['z']==z).all() + i+=1 + del i + ds[0] + if len(ds)>2: + ds[:1] + ds[1:1] + ds[1:1:1] + if len(ds)>5: + ds[[1,2,3]] + for x in ds: + pass + +#ds[:n] returns a dataset with the n first examples. + ds2=ds.subset[:3] + test_ds(ds,ds2,index=[0,1,2]) +# del ds2 + +#ds[i1:i2:s]# returns a ds with the examples i1,i1+s,...i2-s. + ds2=ds.subset[1:7:2] + test_ds(ds,ds2,[1,3,5]) +# del ds2 + +# #ds[i] +# ds2=ds.subset[5] +# assert isinstance(ds2,Example) +# assert have_raised("var['ds']["+str(len(ds))+"]",ds=ds) # index not defined +# assert not have_raised("var['ds']["+str(len(ds)-1)+"]",ds=ds) +# del ds2 + +#ds[[i1,i2,...in]]# returns a ds with examples i1,i2,...in. + ds2=ds.subset[[4,7,2,8]] + test_ds(ds,ds2,[4,7,2,8]) +# del ds2 + +#ds.<property># returns the value of a property associated with + #the name <property>. The following properties should be supported: + # - 'description': a textual description or name for the ds + # - 'fieldtypes': a list of types (one per field) + +#* ds1 | ds2 | ds3 == ds.hstack([ds1,ds2,ds3])#???? + #assert hstack([ds('x','y'),ds('z')])==ds + #hstack([ds('z','y'),ds('x')])==ds + assert have_raised2(hstack,[ds('x'),ds('x')]) + assert have_raised2(hstack,[ds('y','x'),ds('x')]) + assert not have_raised2(hstack,[ds('x'),ds('y')]) + +# i=0 +# for example in hstack([ds('x'),ds('y'),ds('z')]): +# example==ds[i] +# i+=1 +# del i,example +#* ds1 & ds2 & ds3 == ds.vstack([ds1,ds2,ds3])#???? + def test_fields_fct(ds): #@todo, fill correctly assert len(ds.fields())==3 @@ -455,6 +538,7 @@ test_iterate_over_examples(array, ds) test_overrides(ds) test_getitem(array, ds) + test_subset(array, ds) test_ds_iterator(array,ds('x','y'),ds('y','z'),ds('x','y','z')) test_fields_fct(ds) @@ -515,6 +599,15 @@ del a, ds + def test_RenamedFieldsDataSet(self): + a = numpy.random.rand(10,4) + ds = ArrayDataSet(a,Example(['x1','y1','z1','w1'],[slice(3),3,[0,2],0])) + ds = RenamedFieldsDataSet(ds,['x1','y1','z1'],['x','y','z']) + + test_all(a,ds) + + del a, ds + def test_MinibatchDataSet(self): raise NotImplementedError() def test_HStackedDataSet(self): @@ -570,14 +663,17 @@ res = dsc[:] if __name__=='__main__': - if len(sys.argv)==2: - if sys.argv[1]=="--debug": + tests = [] + debug=False + if len(sys.argv)==1: + unittest.main() + else: + assert sys.argv[1]=="--debug" + for arg in sys.argv[2:]: + tests.append(arg) + if tests: + unittest.TestSuite(map(T_DataSet, tests)).debug() + else: module = __import__("_test_dataset") tests = unittest.TestLoader().loadTestsFromModule(module) tests.debug() - print "bad argument: only --debug is accepted" - elif len(sys.argv)==1: - unittest.main() - else: - print "bad argument: only --debug is accepted" -
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/_test_onehotop.py Mon Jul 07 10:08:35 2008 -0400 @@ -0,0 +1,21 @@ +from onehotop import one_hot + +import unittest +from theano import compile +from theano import gradient + +from theano.tensor import as_tensor + +import random +import numpy.random + +class T_OneHot(unittest.TestCase): + def test0(self): + x = as_tensor([3, 2, 1]) + y = as_tensor(5) + o = one_hot(x, y) + y = compile.eval_outputs([o]) + self.failUnless(numpy.all(y == numpy.asarray([[0, 0, 0, 1, 0], [0, 0, 1, 0, 0], [0, 1, 0, 0, 0]]))) + +if __name__ == '__main__': + unittest.main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/_test_random_transformation.py Mon Jul 07 10:08:35 2008 -0400 @@ -0,0 +1,84 @@ +from random_transformation import row_random_transformation + +import unittest +from theano import compile +from theano import gradient + +from theano.sparse import _is_dense, _is_sparse, _is_dense_result, _is_sparse_result +from theano.sparse import _mtypes, _mtype_to_str +from theano.sparse import as_sparse + +from theano.tensor import as_tensor +from theano.scalar import as_scalar + +import random +import numpy.random + +class T_RowRandomTransformation(unittest.TestCase): + def setUp(self): + random.seed(44) + numpy.random.seed(44) + + def test_basic(self): + rows = 4 + cols = 20 + fakeseed = 0 + length = 3 + md = numpy.random.rand(rows, cols) + for mtype in _mtypes: + m = as_sparse(mtype(md)) + o = row_random_transformation(m, length, initial_seed=fakeseed) + y = compile.eval_outputs([o]) + expected = "[[ 0.88239119 1.03244463 -1.29297503]\n [ 0.02644961 1.50119695 -0.025081 ]\n [-0.60741013 1.25424625 0.30119422]\n [-1.08659967 -0.35531544 -1.38915467]]" + self.failUnless(str(y) == expected) + + def test_length(self): + """ Test that if length is increased, we obtain the same results + (except longer). """ + + for i in range(10): + mtype = random.choice(_mtypes) + rows = random.randint(1, 20) + cols = random.randint(1, 20) + fakeseed = random.randint(0, 100) + length = random.randint(1, 10) + extralength = random.randint(1, 10) + + m = as_sparse(mtype(numpy.random.rand(rows, cols))) + o1 = row_random_transformation(m, length, initial_seed=fakeseed) + o2 = row_random_transformation(m, length + extralength, initial_seed=fakeseed) + + y1 = compile.eval_outputs([o1]) + y2 = compile.eval_outputs([o2]) + + self.failUnless((y1 == y2[:,:length]).all()) + + def test_permute(self): + """ Test that if the order of the rows is permuted, we obtain the same results. """ + for i in range(10): + mtype = random.choice(_mtypes) + rows = random.randint(2, 20) + cols = random.randint(1, 20) + fakeseed = random.randint(0, 100) + length = random.randint(1, 10) + + permute = numpy.random.permutation(rows) + + + m1 = numpy.random.rand(rows, cols) + m2 = m1[permute] + for r in range(rows): + self.failUnless((m2[r] == m1[permute[r]]).all()) + s1 = as_sparse(mtype(m1)) + s2 = as_sparse(mtype(m2)) + o1 = row_random_transformation(s1, length, initial_seed=fakeseed) + o2 = row_random_transformation(s2, length, initial_seed=fakeseed) + y1 = compile.eval_outputs([o1]) + y2 = compile.eval_outputs([o2]) + + self.failUnless(y1.shape == y2.shape) + for r in range(rows): + self.failUnless((y2[r] == y1[permute[r]]).all()) + +if __name__ == '__main__': + unittest.main()
--- a/dataset.py Mon Jun 16 17:47:36 2008 -0400 +++ b/dataset.py Mon Jul 07 10:08:35 2008 -0400 @@ -1,6 +1,6 @@ from lookup_list import LookupList as Example -from misc import unique_elements_list_intersection +from common.misc import unique_elements_list_intersection from string import join from sys import maxint import numpy, copy @@ -381,7 +381,8 @@ any other object that supports integer indexing and slicing. @ATTENTION: now minibatches returns minibatches_nowrap, which is supposed to return complete - batches only, raise StopIteration + batches only, raise StopIteration. + @ATTENTION: minibatches returns a LookupList, we can't iterate over examples on it. """ #return DataSet.MinibatchWrapAroundIterator(self,fieldnames,minibatch_size,n_batches,offset)\ @@ -435,6 +436,16 @@ Return a dataset that sees only the fields whose name are specified. """ assert self.hasFields(*fieldnames) + #return self.fields(*fieldnames).examples() + fieldnames_list = list(fieldnames) + return FieldsSubsetDataSet(self,fieldnames_list) + + def cached_fields_subset(self,*fieldnames) : + """ + Behaviour is supposed to be the same as __call__(*fieldnames), but the dataset returned is cached. + @see : dataset.__call__ + """ + assert self.hasFields(*fieldnames) return self.fields(*fieldnames).examples() def fields(self,*fieldnames): @@ -692,6 +703,7 @@ assert len(src_fieldnames)==len(new_fieldnames) self.valuesHStack = src.valuesHStack self.valuesVStack = src.valuesVStack + self.lookup_fields = Example(new_fieldnames,src_fieldnames) def __len__(self): return len(self.src) @@ -719,9 +731,18 @@ def minibatches_nowrap(self,fieldnames,minibatch_size,n_batches,offset): assert self.hasFields(*fieldnames) - return self.src.minibatches_nowrap(fieldnames,minibatch_size,n_batches,offset) + cursor = Example(fieldnames,[0]*len(fieldnames)) + for batch in self.src.minibatches_nowrap([self.lookup_fields[f] for f in fieldnames],minibatch_size,n_batches,offset): + cursor._values=batch._values + yield cursor + def __getitem__(self,i): - return FieldsSubsetDataSet(self.src[i],self.new_fieldnames) +# return FieldsSubsetDataSet(self.src[i],self.new_fieldnames) + complete_example = self.src[i] + return Example(self.new_fieldnames, + [complete_example[field] + for field in self.src_fieldnames]) + class DataSetFields(Example): @@ -859,7 +880,9 @@ return self def next(self): upper = self.next_example+minibatch_size - assert upper<=self.ds.length + if upper > len(self.ds) : + raise StopIteration() + assert upper<=len(self.ds) # instead of self.ds.length #minibatch = Example(self.ds._fields.keys(), # [field[self.next_example:upper] # for field in self.ds._fields]) @@ -1314,7 +1337,10 @@ # into memory at once, which may be too much # the work could possibly be done by minibatches # that are as large as possible but no more than what memory allows. - fields_values = source_dataset.minibatches(minibatch_size=len(source_dataset)).__iter__().next() + # + # field_values is supposed to be an DataSetFields, that inherits from LookupList + #fields_values = source_dataset.minibatches(minibatch_size=len(source_dataset)).__iter__().next() + fields_values = DataSetFields(source_dataset,None) assert all([len(self)==len(field_values) for field_values in fields_values]) for example in fields_values.examples(): self.cached_examples.append(copy.copy(example)) @@ -1333,16 +1359,25 @@ self.dataset=dataset self.current=offset self.all_fields = self.dataset.fieldNames()==fieldnames + self.n_batches = n_batches + self.batch_counter = 0 def __iter__(self): return self def next(self): + self.batch_counter += 1 + if self.n_batches and self.batch_counter > self.n_batches : + raise StopIteration() upper = self.current+minibatch_size + if upper > len(self.dataset.source_dataset): + raise StopIteration() cache_len = len(self.dataset.cached_examples) if upper>cache_len: # whole minibatch is not already in cache # cache everything from current length to upper - for example in self.dataset.source_dataset[cache_len:upper]: + #for example in self.dataset.source_dataset[cache_len:upper]: + for example in self.dataset.source_dataset.subset[cache_len:upper]: self.dataset.cached_examples.append(example) all_fields_minibatch = Example(self.dataset.fieldNames(), zip(*self.dataset.cached_examples[self.current:self.current+minibatch_size])) + self.current+=minibatch_size if self.all_fields: return all_fields_minibatch
--- a/linear_regression.py Mon Jun 16 17:47:36 2008 -0400 +++ b/linear_regression.py Mon Jul 07 10:08:35 2008 -0400 @@ -4,11 +4,12 @@ the use of theano. """ -from learner import * -from theano import tensor as t +from pylearn import OfflineLearningAlgorithm +from theano import tensor as T from theano.scalar import as_scalar +from common.autoname import AutoName -class LinearRegression(MinibatchUpdatesTLearner): +class LinearRegression(OfflineLearningAlgorithm): """ Implement linear regression, with or without L2 regularization (the former is called Ridge Regression and the latter Ordinary Least Squares). @@ -40,96 +41,122 @@ plus L2_regularizer on the diagonal except at (0,0), and XtY is a (n_inputs+1)*n_outputs matrix containing X'*Y. - The fields and attributes expected and produced by use and update are the following: + The dataset fields expected and produced by the learning algorithm and the trained model + are the following: - - Input and output fields (example-wise quantities): + - Input and output dataset fields (example-wise quantities): - - 'input' (always expected by use and update as an input_dataset field) - - 'target' (optionally expected by use and update as an input_dataset field) - - 'output' (optionally produced by use as an output dataset field) - - 'squared_error' (optionally produced by use as an output dataset field, needs 'target') = example-wise squared error + - 'input' (always expected as an input_dataset field) + - 'target' (always expected by the learning algorithm, optional for learned model) + - 'output' (always produced by learned model) + - 'squared_error' (optionally produced by learned model if 'target' is provided) + = example-wise squared error + """ + def __init__(self, L2_regularizer=0): + self.predictor = LinearPredictor(None,None + self.L2_regularizer=L2_regularizer + self._XtX = T.matrix('XtX') + self._XtY = T.matrix('XtY') + self._extended_input = T.prepend_one_to_each_row(self._input) - - optional attributes (optionally expected as input_dataset attributes) - (warning, this may be dangerous, the 'use' method will use those provided in the - input_dataset rather than those learned during 'update'; currently no support - for providing these to update): - - - 'L2_regularizer' - - 'b' - - 'W' - - 'parameters' = [b, W] - - 'regularization_term' - - 'XtX' - - 'XtY' - - """ +class LinearPredictorEquations(AutoName): + inputs = T.matrix() # minibatchsize x n_inputs + targets = T.matrix() # minibatchsize x n_outputs + theta = T.matrix() # (n_inputs+1) x n_outputs + b = theta[0] + Wt = theta[1:,:] + outputs = T.dot(inputs,Wt) + b # minibatchsize x n_outputs + squared_errors = T.sum(T.sqr(targets-outputs),axis=1) - def attributeNames(self): - return ["L2_regularizer","parameters","b","W","regularization_term","XtX","XtY"] + __compiled = False + @classmethod + def compile(cls,linker='c|py'): + if cls.__compiled: + return + def fn(input_vars,output_vars): + return staticmethod(theano.function(input_vars,output_vars, linker=linker)) - def useInputAttributes(self): - return ["b","W"] - - def useOutputAttributes(self): - return [] + cls.compute_outputs = fn([inputs,theta],[outputs]) + cls.compute_errors = fn([outputs,targets],[squared_errors]) - def updateInputAttributes(self): - return ["L2_regularizer","XtX","XtY"] + cls.__compiled = True - def updateMinibatchInputFields(self): - return ["input","target"] - - def updateMinibatchInputAttributes(self): - return ["XtX","XtY"] + def __init__(self) + self.compile() + +class LinearRegressionEquations(LinearPredictorEquations): + P = LinearPredictorEquations + XtX = T.matrix() # (n_inputs+1) x (n_inputs+1) + XtY = T.matrix() # (n_inputs+1) x n_outputs + extended_input = T.prepend_scalar_to_each_row(1,P.inputs) + new_XtX = add_inplace(XtX,T.dot(extended_input.T,extended_input)) + new_XtY = add_inplace(XtY,T.dot(extended_input.T,P.targets)) - def updateMinibatchOutputAttributes(self): - return ["new_XtX","new_XtY"] - - def updateEndInputAttributes(self): - return ["theta","XtX","XtY"] - - def updateEndOutputAttributes(self): - return ["new_theta","b","W","regularization_term"] # CHECK: WILL b AND W CONTAIN OLD OR NEW THETA? @todo i.e. order of computation = ? +class LinearPredictor(object): + """ + A linear predictor has parameters theta (a bias vector and a weight matrix) + it can use to make a linear prediction (according to the LinearPredictorEquations). + It can compute its output (bias + weight * input) and a squared error (||output - target||^2). + """ + def __init__(self, theta): + self.theta=theta + self.n_inputs=theta.shape[0]-1 + self.n_outputs=theta.shape[1] + self.predict_equations = LinearPredictorEquations() - def parameterAttributes(self): - return ["b","W"] + def compute_outputs(self,inputs): + return self.predict_equations.compute_outputs(inputs,self.theta) + def compute_errors(self,inputs,targets): + return self.predict_equations.compute_errors(self.compute_outputs(inputs),targets) + def compute_outputs_and_errors(self,inputs,targets): + outputs = self.compute_outputs(inputs) + return [outputs,self.predict_equations.compute_errors(outputs,targets)] - def defaultOutputFields(self, input_fields): - output_fields = ["output"] - if "target" in input_fields: - output_fields.append("squared_error") - return output_fields + def __call__(self,dataset,output_fieldnames=None,cached_output_dataset=False): + assert dataset.hasFields(["input"]) + if output_fieldnames is None: + if dataset.hasFields(["target"]): + output_fieldnames = ["output","squared_error"] + else: + output_fieldnames = ["output"] + output_fieldnames.sort() + if output_fieldnames == ["squared_error"]: + f = self.compute_errors + elif output_fieldnames == ["output"]: + f = self.compute_outputs + elif output_fieldnames == ["output","squared_error"]: + f = self.compute_outputs_and_errors + else: + raise ValueError("unknown field(s) in output_fieldnames: "+str(output_fieldnames)) - def __init__(self): - self._input = t.matrix('input') # n_examples x n_inputs - self._target = t.matrix('target') # n_examples x n_outputs - self._L2_regularizer = as_scalar(0.,'L2_regularizer') - self._theta = t.matrix('theta') - self._W = self._theta[:,1:] - self._b = self._theta[:,0] - self._XtX = t.matrix('XtX') - self._XtY = t.matrix('XtY') - self._extended_input = t.prepend_one_to_each_row(self._input) - self._output = t.dot(self._input,self._W.T) + self._b # (n_examples , n_outputs) matrix - self._squared_error = t.sum_within_rows(t.sqr(self._output-self._target)) # (n_examples ) vector - self._regularizer = self._L2_regularizer * t.dot(self._W,self._W) - self._new_XtX = add_inplace(self._XtX,t.dot(self._extended_input.T,self._extended_input)) - self._new_XtY = add_inplace(self._XtY,t.dot(self._extended_input.T,self._target)) - self._new_theta = t.solve_inplace(self._theta,self._XtX,self._XtY) + ds=ApplyFunctionDataSet(dataset,f,output_fieldnames) + if cached_output_dataset: + return CachedDataSet(ds) + else: + return ds + - MinibatchUpdatesTLearner.__init__(self) - - def allocate(self,minibatch): - minibatch_n_inputs = minibatch["input"].shape[1] - minibatch_n_outputs = minibatch["target"].shape[1] + self._XtX = T.matrix('XtX') + self._XtY = T.matrix('XtY') + self._extended_input = T.prepend_one_to_each_row(self._input) + self._output = T.dot(self._input,self._W.T) + self._b # (n_examples , n_outputs) matrix + self._squared_error = T.sum_within_rows(T.sqr(self._output-self._target)) # (n_examples ) vector + self._regularizer = self._L2_regularizer * T.dot(self._W,self._W) + self._new_XtX = add_inplace(self._XtX,T.dot(self._extended_input.T,self._extended_input)) + self._new_XtY = add_inplace(self._XtY,T.dot(self._extended_input.T,self._target)) + self._new_theta = T.solve_inplace(self._theta,self._XtX,self._XtY) + + def allocate(self,dataset): + dataset_n_inputs = dataset["input"].shape[1] + dataset_n_outputs = dataset["target"].shape[1] if not self._n_inputs: - self._n_inputs = minibatch_n_inputs - self._n_outputs = minibatch_n_outputs + self._n_inputs = dataset_n_inputs + self._n_outputs = dataset_n_outputs self.XtX = numpy.zeros((1+self._n_inputs,1+self._n_inputs)) self.XtY = numpy.zeros((1+self._n_inputs,self._n_outputs)) self.theta = numpy.zeros((self._n_outputs,1+self._n_inputs)) self.forget() - elif self._n_inputs!=minibatch_n_inputs or self._n_outputs!=minibatch_n_outputs: + elif self._n_inputs!=dataset_n_inputs or self._n_outputs!=dataset_n_outputs: # if the input or target changes dimension on the fly, we resize and forget everything self.forget() @@ -141,3 +168,6 @@ self.XtY.data[:,:]=0 numpy.diag(self.XtX.data)[1:]=self.L2_regularizer + def __call__(self,dataset): + +
--- a/misc.py Mon Jun 16 17:47:36 2008 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,18 +0,0 @@ - -def unique_elements_list_intersection(list1,list2): - """ - Return the unique elements that are in both list1 and list2 - (repeated elements in listi will not be duplicated in the result). - This should run in O(n1+n2) where n1=|list1|, n2=|list2|. - """ - return list(set.intersection(set(list1),set(list2))) -import time -#http://www.daniweb.com/code/snippet368.html -def print_timing(func): - def wrapper(*arg): - t1 = time.time() - res = func(*arg) - t2 = time.time() - print '%s took %0.3f ms' % (func.func_name, (t2-t1)*1000.0) - return res - return wrapper
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/onehotop.py Mon Jul 07 10:08:35 2008 -0400 @@ -0,0 +1,58 @@ +""" +One hot Op +""" + +#from theano import tensor +from theano.tensor import as_tensor, Tensor +from theano.gof import op +from theano.gof.graph import Apply + +import numpy + +class OneHot(op.Op): + """ + Construct a one-hot vector, x out of y. + + @todo: Document inputs and outputs + @todo: Use 'bool' as output dtype? Or, at least 'int64' ? Not float64! + @todo: Use 'bool' as output dtype, not 'int64' ? + @todo: Allow this to operate on column vectors (Tensor) + @todo: Describe better. + """ + + def make_node(self, x, y): + """ + @type x: Vector L{Tensor} of integers + @param x: The entries of the one-hot vector to be one. + @type y: Integer scalar L{Tensor} + @param y: The length (#columns) of the one-hot vectors. + @return: A L{Tensor} of one-hot vectors + + @precondition: x < y for all entries of x + @todo: Check that x and y are int types + """ + x = as_tensor(x) + y = as_tensor(y) + #assert x.dtype[0:3] == "int" + #assert y.dtype[0:3] == "int" + inputs = [x, y] + ##outputs = [tensor.Tensor("int64", broadcastable=[False, False])] + #outputs = [tensor.Tensor("float64", broadcastable=[False, False])] + #outputs = [Tensor("int64", broadcastable=[False, False])] + outputs = [Tensor("float64", broadcastable=[False, False]).make_result()] + node = Apply(op = self, inputs = inputs, outputs = outputs) + return node + + def perform(self, node, (x, y), (out, )): + assert x.dtype == "int64" or x.dtype == "int32" + assert x.ndim == 1 + assert y.dtype == "int64" or x.dtype == "int32" + assert y.ndim == 0 + out[0] = numpy.zeros((x.shape[0], y), dtype="float64") + for c in range(x.shape[0]): + assert x[c] < y + out[0][c, x[c]] = 1 + + def grad(self, (x, y), (out_gradient, )): + return None, None +one_hot = OneHot()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/onehotop.py.scalar Mon Jul 07 10:08:35 2008 -0400 @@ -0,0 +1,64 @@ +""" +One hot Op +""" + +#from theano import tensor +from theano.tensor import as_tensor, Tensor +#from theano import scalar +from theano.scalar import as_scalar +from theano.gof import op +from theano.gof.graph import Apply + +import numpy + +class OneHot(op.Op): + """ + Construct a one-hot vector, x out of y. + + @todo: Document inputs and outputs + @todo: Use 'bool' as output dtype? Or, at least 'int64' ? Not float64! + @todo: Use 'bool' as output dtype, not 'int64' ? + @todo: Allow this to operate on column vectors (Tensor) + @todo: Describe better. + @todo: What type is y? + @todo: What about operating on L{Scalar}s? + """ + + def make_node(self, x, y): + """ + @type x: Vector L{Tensor} of integers + @param x: The entries of the one-hot vector to be one. + @type y: Integer L{Scalar} + @param y: The length (#columns) of the one-hot vectors. + @return: A L{Tensor} of one-hot vectors + + @precondition: x < y for all entries of x + @todo: Check that x and y are int types + """ + #x = tensor.as_tensor(x) + #y = scalar.as_scalar(y) + x = as_tensor(x) + y = as_scalar(y) + #assert x.dtype[0:3] == "int" + #assert y.dtype[0:3] == "int" + inputs = [x, y] + ##outputs = [tensor.Tensor("int64", broadcastable=[False, False])] + #outputs = [tensor.Tensor("float64", broadcastable=[False, False])] + #outputs = [Tensor("int64", broadcastable=[False, False])] + outputs = [Tensor("float64", broadcastable=[False, False]).make_result()] + node = Apply(op = self, inputs = inputs, outputs = outputs) + return node + + def perform(self, node, (x, y), (out, )): + assert x.dtype == "int64" + assert type(y) == numpy.int64 + assert x.ndim == 1 + #out = numpy.zeros((x.shape[0], y), dtype="int64") + out[0] = numpy.zeros((x.shape[0], y), dtype="float64") + for c in range(x.shape[0]): + assert x[c] < y + out[0][c, x[c]] = 1 + + def grad(self, (x, y), (out_gradient, )): + return None, None +one_hot = OneHot()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/random_transformation.py Mon Jul 07 10:08:35 2008 -0400 @@ -0,0 +1,132 @@ +""" +New L{Op}s that aren't in core theano +""" + +from theano import sparse +from theano import tensor +from theano import scalar +from theano.gof import op + +from theano.sparse import _is_dense, _is_sparse, _is_dense_result, _is_sparse_result + +import scipy.sparse + +import numpy + +class RowRandomTransformation(op.Op): + """ + Given C{x}, a (sparse) matrix with shape (exmpls, dimensions), we + multiply it by a deterministic random matrix of shape (dimensions, + length) to obtain random transformation output of shape (exmpls, + length). + + Each element of the deterministic random matrix is selected uniformly + from [-1, +1). + @todo: Use another random distribution? + + @note: This function should be written such that if length is + increased, we obtain the same results (except longer). Similarly, + the rows should be able to be permuted and get the same result in + the same fashion. + + @todo: This may be slow? + @todo: Rewrite for dense matrices too? + @todo: Is there any way to verify the convention that each row is + an example? Should I rename the variables in the code to make the + semantics more explicit? + @todo: AUTOTEST: Autotest that dense and spare versions of this are identical. + @todo: Rename? Is Row the correct name? Maybe column-wise? + + @type x: L{scipy.sparse.spmatrix} + @param x: Sparse matrix to be randomly transformed with shape (exmpls, dimensions) + @type length: int + @param length: The number of transformations of C{x} to be performed. + @param initial_seed: Initial seed for the RNG. + @rtype: L{numpy.ndarray} + @return: Array with C{length} random transformations, with shape (exmpls, length) + """ + + import random + """ + RNG used for random transformations. + Does not share state with rest of program. + @todo: Make STATIC and private. Ask James or Olivier how to make this more Pythonic. + """ + _trng = random.Random() + + def __init__(self, x, length, initial_seed=0, **kwargs): + """ + @todo: Which broadcastable values should I use? + """ + assert 0 # Needs to be updated to Olivier's new Op creation approach + op.Op.__init__(self, **kwargs) + x = sparse.as_sparse(x) + self.initial_seed = initial_seed + self.length = length + self.inputs = [x] + self.outputs = [tensor.Tensor(x.dtype, broadcastable=[False, False])] +# self.outputs = [tensor.Tensor(x.dtype, broadcastable=[True, True])] + + def _random_matrix_value(self, row, col, rows): + """ + From a deterministic random matrix, find one element. + @param row: The row of the element to be read. + @param col: The column of the element to be read. + @param row: The number of rows in the matrix. + @type row: int + @type col: int + @type rows: int + @note: This function is designed such that if we extend + the number of columns in the random matrix, the values of + the earlier entries is unchanged. + @todo: Make this static + """ + # Choose the random entry at (l, c) + rngidx = col * rows + row + # Set the random number state for this random entry + # Note: This may be slow + self._trng.seed(rngidx + self.initial_seed) + + # Determine the value for this entry + val = self._trng.uniform(-1, +1) +# print "Exmpl #%d, dimension #%d => Random projection #%d has idx %d (+ seed %d) and value %f" % (r, c, j, rngidx, self.initial_seed, val) + return val + + def impl(self, xorig): + assert _is_sparse(xorig) + assert len(xorig.shape) == 2 + # Since conversions to and from the COO format are quite fast, you + # can use this approach to efficiently implement lots computations + # on sparse matrices. + x = xorig.tocoo() + (rows, cols) = x.shape + tot = rows * cols + out = numpy.zeros((rows, self.length)) +# print "l = %d" % self.length +# print "x.getnnz() = %d" % x.getnnz() + all = zip(x.col, x.row, x.data) + all.sort() # TODO: Maybe this is very slow? + lastc = None + lastl = None + lastval = None + for l in range(self.length): + for (c, r, data) in all: + assert c < cols + assert r < rows + if not c == lastc or not l == lastl: + lastc = c + lastl = l + lastval = self._random_matrix_value(c, l, cols) + val = lastval +# val = self._random_matrix_value(c, l, cols) +# val = self._trng.uniform(-1, +1) +# val = 1.0 + out[r][l] += val * data + return out + def __copy__(self): + return self.__class__(self.inputs[0], self.length, self.initial_seed) + def clone_with_new_inputs(self, *new_inputs): + return self.__class__(new_inputs[0], self.length, self.initial_seed) + def desc(self, *new_inputs): + return (self.__class__, self.length, self.initial_seed) +row_random_transformation = RowRandomTransformation()
--- a/statscollector.py Mon Jun 16 17:47:36 2008 -0400 +++ b/statscollector.py Mon Jul 07 10:08:35 2008 -0400 @@ -1,7 +1,13 @@ # Here is how I see stats collectors: -# def my_stats((residue,nll),(regularizer)): +def my_stats(graph): + graph.mse=examplewise_mean(square_norm(graph.residue)) + graph.training_loss=graph.regularizer+examplewise_sum(graph.nll) + return [graph.mse,graph.training_loss] + + +# def my_stats(residue,nll,regularizer): # mse=examplewise_mean(square_norm(residue)) # training_loss=regularizer+examplewise_sum(nll) # set_names(locals())