# HG changeset patch # User delallea@opale.iro.umontreal.ca # Date 1212157156 14400 # Node ID ef70a665aaaf416db6a4cc4f27abd202f050a214 # Parent ddb88a8e9fd2e45decf033e50a4b7e78a33254dc# Parent 97f35d58672706c2522c2ce61eae65788681a9e0 Hmm... that was committed by Fred I think, I got lost by Mercurial I think diff -r ddb88a8e9fd2 -r ef70a665aaaf dataset.py --- a/dataset.py Fri May 30 10:14:46 2008 -0400 +++ b/dataset.py Fri May 30 10:19:16 2008 -0400 @@ -987,7 +987,7 @@ for fieldname, fieldcolumns in self.fields_columns.items(): if type(fieldcolumns) is int: assert fieldcolumns>=0 and fieldcolumns=0 and offset=self.dataset.data.shape[0]: raise StopIteration sub_data = self.dataset.data[self.current] - self.minibatch._values = [sub_data[self.dataset.fields_columns[f]] for f in self.minibatch._names] + self.minibatch._values = [sub_data[c] for c in self.columns] + self.current+=self.minibatch_size return self.minibatch diff -r ddb88a8e9fd2 -r ef70a665aaaf denoising_aa.py --- a/denoising_aa.py Fri May 30 10:14:46 2008 -0400 +++ b/denoising_aa.py Fri May 30 10:19:16 2008 -0400 @@ -9,6 +9,7 @@ from nnet_ops import * import math from misc import * +from misc_theano import * from theano.tensor_random import binomial def hiding_corruption_formula(seed,average_fraction_hidden): diff -r ddb88a8e9fd2 -r ef70a665aaaf misc.py --- a/misc.py Fri May 30 10:14:46 2008 -0400 +++ b/misc.py Fri May 30 10:19:16 2008 -0400 @@ -1,24 +1,3 @@ - -import theano - -class Print(theano.Op): - def __init__(self,message=""): - self.message=message - self.view_map={0:[0]} - - def make_node(self,xin): - xout = xin.type.make_result() - return theano.Apply(op = self, inputs = [xin], outputs=[xout]) - - def perform(self,node,inputs,output_storage): - xin, = inputs - xout, = output_storage - xout[0] = xin - print self.message,xin - - def grad(self,input,output_gradients): - return output_gradients - def unique_elements_list_intersection(list1,list2): """ diff -r ddb88a8e9fd2 -r ef70a665aaaf test_dataset.py --- a/test_dataset.py Fri May 30 10:14:46 2008 -0400 +++ b/test_dataset.py Fri May 30 10:19:16 2008 -0400 @@ -194,38 +194,41 @@ m=ds.minibatches(['x','y'],n_batches=1,minibatch_size=3,offset=4) assert isinstance(m,DataSet.MinibatchWrapAroundIterator) for x,y in m: - assert len(x)==3 - assert len(y)==3 - for id in range(3): + assert len(x)==m.minibatch_size + assert len(y)==m.minibatch_size + for id in range(m.minibatch_size): assert (numpy.append(x[id],y[id])==array[i+4]).all() i+=1 - assert i==3 + assert i==m.n_batches*m.minibatch_size del x,y,i,id,m i=0 m=ds.minibatches(['x','y'],n_batches=2,minibatch_size=3,offset=4) assert isinstance(m,DataSet.MinibatchWrapAroundIterator) for x,y in m: - assert len(x)==3 - assert len(y)==3 - for id in range(3): + assert len(x)==m.minibatch_size + assert len(y)==m.minibatch_size + for id in range(m.minibatch_size): assert (numpy.append(x[id],y[id])==array[i+4]).all() i+=1 - assert i==6 + assert i==m.n_batches*m.minibatch_size del x,y,i,id,m i=0 m=ds.minibatches(['x','y'],n_batches=20,minibatch_size=3,offset=4) assert isinstance(m,DataSet.MinibatchWrapAroundIterator) for x,y in m: - assert len(x)==3 - assert len(y)==3 - for id in range(3): + assert len(x)==m.minibatch_size + assert len(y)==m.minibatch_size + for id in range(m.minibatch_size): assert (numpy.append(x[id],y[id])==array[(i+4)%array.shape[0]]).all() i+=1 assert i==m.n_batches*m.minibatch_size del x,y,i,id + #@todo: we can't do minibatch bigger then the size of the dataset??? + assert have_raised2(ds.minibatches,['x','y'],n_batches=1,minibatch_size=len(array)+1,offset=0) + assert not have_raised2(ds.minibatches,['x','y'],n_batches=1,minibatch_size=len(array),offset=0) def test_ds_iterator(array,iterator1,iterator2,iterator3): l=len(iterator1) @@ -494,10 +497,7 @@ print "test_speed" import time a2 = numpy.random.rand(100000,400) - ds = ArrayDataSet(a2,{'x':slice(3),'y':3,'z':[0,2]})###???tuple not tested - ds = ArrayDataSet(a2,LookupList(['x','y','z'],[slice(3),3,[0,2]]))###???tuple not tested ds = ArrayDataSet(a2,{'all':slice(0,a2.shape[1],1)}) - #assert ds==a? should this work? mat = numpy.random.rand(400,100) @print_timing def f_array1(a):