Mercurial > pylearn
changeset 242:ef70a665aaaf
Hmm... that was committed by Fred I think, I got lost by Mercurial I think
author | delallea@opale.iro.umontreal.ca |
---|---|
date | Fri, 30 May 2008 10:19:16 -0400 |
parents | ddb88a8e9fd2 (current diff) 97f35d586727 (diff) |
children | c8f19a9eb10f |
files | dataset.py |
diffstat | 4 files changed, 20 insertions(+), 37 deletions(-) [+] |
line wrap: on
line diff
--- a/dataset.py Fri May 30 10:14:46 2008 -0400 +++ b/dataset.py Fri May 30 10:19:16 2008 -0400 @@ -987,7 +987,7 @@ for fieldname, fieldcolumns in self.fields_columns.items(): if type(fieldcolumns) is int: assert fieldcolumns>=0 and fieldcolumns<data_array.shape[1] - if 0: + if 1: #I changed this because it didn't make sense to me, # and it made it more difficult to write my learner. # If it breaks stuff, let's talk about it. @@ -1054,6 +1054,8 @@ assert offset>=0 and offset<len(dataset.data) assert offset+minibatch_size<=len(dataset.data) self.current=offset + self.columns = [self.dataset.fields_columns[f] + for f in self.minibatch._names] def __iter__(self): return self def next(self): @@ -1062,7 +1064,8 @@ if self.current>=self.dataset.data.shape[0]: raise StopIteration sub_data = self.dataset.data[self.current] - self.minibatch._values = [sub_data[self.dataset.fields_columns[f]] for f in self.minibatch._names] + self.minibatch._values = [sub_data[c] for c in self.columns] + self.current+=self.minibatch_size return self.minibatch
--- a/denoising_aa.py Fri May 30 10:14:46 2008 -0400 +++ b/denoising_aa.py Fri May 30 10:19:16 2008 -0400 @@ -9,6 +9,7 @@ from nnet_ops import * import math from misc import * +from misc_theano import * from theano.tensor_random import binomial def hiding_corruption_formula(seed,average_fraction_hidden):
--- a/misc.py Fri May 30 10:14:46 2008 -0400 +++ b/misc.py Fri May 30 10:19:16 2008 -0400 @@ -1,24 +1,3 @@ - -import theano - -class Print(theano.Op): - def __init__(self,message=""): - self.message=message - self.view_map={0:[0]} - - def make_node(self,xin): - xout = xin.type.make_result() - return theano.Apply(op = self, inputs = [xin], outputs=[xout]) - - def perform(self,node,inputs,output_storage): - xin, = inputs - xout, = output_storage - xout[0] = xin - print self.message,xin - - def grad(self,input,output_gradients): - return output_gradients - def unique_elements_list_intersection(list1,list2): """
--- a/test_dataset.py Fri May 30 10:14:46 2008 -0400 +++ b/test_dataset.py Fri May 30 10:19:16 2008 -0400 @@ -194,38 +194,41 @@ m=ds.minibatches(['x','y'],n_batches=1,minibatch_size=3,offset=4) assert isinstance(m,DataSet.MinibatchWrapAroundIterator) for x,y in m: - assert len(x)==3 - assert len(y)==3 - for id in range(3): + assert len(x)==m.minibatch_size + assert len(y)==m.minibatch_size + for id in range(m.minibatch_size): assert (numpy.append(x[id],y[id])==array[i+4]).all() i+=1 - assert i==3 + assert i==m.n_batches*m.minibatch_size del x,y,i,id,m i=0 m=ds.minibatches(['x','y'],n_batches=2,minibatch_size=3,offset=4) assert isinstance(m,DataSet.MinibatchWrapAroundIterator) for x,y in m: - assert len(x)==3 - assert len(y)==3 - for id in range(3): + assert len(x)==m.minibatch_size + assert len(y)==m.minibatch_size + for id in range(m.minibatch_size): assert (numpy.append(x[id],y[id])==array[i+4]).all() i+=1 - assert i==6 + assert i==m.n_batches*m.minibatch_size del x,y,i,id,m i=0 m=ds.minibatches(['x','y'],n_batches=20,minibatch_size=3,offset=4) assert isinstance(m,DataSet.MinibatchWrapAroundIterator) for x,y in m: - assert len(x)==3 - assert len(y)==3 - for id in range(3): + assert len(x)==m.minibatch_size + assert len(y)==m.minibatch_size + for id in range(m.minibatch_size): assert (numpy.append(x[id],y[id])==array[(i+4)%array.shape[0]]).all() i+=1 assert i==m.n_batches*m.minibatch_size del x,y,i,id + #@todo: we can't do minibatch bigger then the size of the dataset??? + assert have_raised2(ds.minibatches,['x','y'],n_batches=1,minibatch_size=len(array)+1,offset=0) + assert not have_raised2(ds.minibatches,['x','y'],n_batches=1,minibatch_size=len(array),offset=0) def test_ds_iterator(array,iterator1,iterator2,iterator3): l=len(iterator1) @@ -494,10 +497,7 @@ print "test_speed" import time a2 = numpy.random.rand(100000,400) - ds = ArrayDataSet(a2,{'x':slice(3),'y':3,'z':[0,2]})###???tuple not tested - ds = ArrayDataSet(a2,LookupList(['x','y','z'],[slice(3),3,[0,2]]))###???tuple not tested ds = ArrayDataSet(a2,{'all':slice(0,a2.shape[1],1)}) - #assert ds==a? should this work? mat = numpy.random.rand(400,100) @print_timing def f_array1(a):