Mercurial > pylearn
changeset 145:933db7ece663
make some function global to reuse them to test other dataset
author | Frederic Bastien <bastienf@iro.umontreal.ca> |
---|---|
date | Mon, 12 May 2008 15:35:18 -0400 |
parents | ceae4de18981 |
children | 625d2b21ee48 a5329e719229 |
files | test_dataset.py |
diffstat | 1 files changed, 304 insertions(+), 303 deletions(-) [+] |
line wrap: on
line diff
--- a/test_dataset.py Mon May 12 15:08:18 2008 -0400 +++ b/test_dataset.py Mon May 12 15:35:18 2008 -0400 @@ -3,7 +3,8 @@ from math import * import numpy -def have_raised(to_eval): +def have_raised(to_eval, **var): + have_thrown = False try: eval(to_eval) @@ -32,6 +33,301 @@ print "var=",var print "take a slice and look at field y",ds[1:6:2]["y"] +def test_iterate_over_examples(array,ds): +#not in doc!!! + i=0 + for example in range(len(ds)): + assert (ds[example]['x']==array[example][:3]).all() + assert ds[example]['y']==array[example][3] + assert (ds[example]['z']==array[example][[0,2]]).all() + i+=1 + assert i==len(ds) + del example,i + +# - for example in dataset: + i=0 + for example in ds: + assert len(example)==3 + assert (example['x']==array[i][:3]).all() + assert example['y']==array[i][3] + assert (example['z']==array[i][0:3:2]).all() + assert (numpy.append(example['x'],example['y'])==array[i]).all() + i+=1 + assert i==len(ds) + del example,i + +# - for val1,val2,... in dataset: + i=0 + for x,y,z in ds: + assert (x==array[i][:3]).all() + assert y==array[i][3] + assert (z==array[i][0:3:2]).all() + assert (numpy.append(x,y)==array[i]).all() + i+=1 + assert i==len(ds) + del x,y,z,i + +# - for example in dataset(field1, field2,field3, ...): + i=0 + for example in ds('x','y','z'): + assert len(example)==3 + assert (example['x']==array[i][:3]).all() + assert example['y']==array[i][3] + assert (example['z']==array[i][0:3:2]).all() + assert (numpy.append(example['x'],example['y'])==array[i]).all() + i+=1 + assert i==len(ds) + del example,i + i=0 + for example in ds('y','x'): + assert len(example)==2 + assert (example['x']==array[i][:3]).all() + assert example['y']==array[i][3] + assert (numpy.append(example['x'],example['y'])==array[i]).all() + i+=1 + assert i==len(ds) + del example,i + +# - for val1,val2,val3 in dataset(field1, field2,field3): + i=0 + for x,y,z in ds('x','y','z'): + assert (x==array[i][:3]).all() + assert y==array[i][3] + assert (z==array[i][0:3:2]).all() + assert (numpy.append(x,y)==array[i]).all() + i+=1 + assert i==len(ds) + del x,y,z,i + i=0 + for y,x in ds('y','x',): + assert (x==array[i][:3]).all() + assert y==array[i][3] + assert (numpy.append(x,y)==array[i]).all() + i+=1 + assert i==len(ds) + del x,y,i + + def test_minibatch_size(minibatch,minibatch_size,len_ds,nb_field,nb_iter_finished): + ##full minibatch or the last minibatch + for idx in range(nb_field): + test_minibatch_field_size(minibatch[idx],minibatch_size,len_ds,nb_iter_finished) + del idx + def test_minibatch_field_size(minibatch_field,minibatch_size,len_ds,nb_iter_finished): + assert len(minibatch_field)==minibatch_size or ((nb_iter_finished*minibatch_size+len(minibatch_field))==len_ds and len(minibatch_field)<minibatch_size) + +# - for minibatch in dataset.minibatches([field1, field2, ...],minibatch_size=N): + i=0 + mi=0 + m=ds.minibatches(['x','z'], minibatch_size=3) + assert isinstance(m,DataSet.MinibatchWrapAroundIterator) + for minibatch in m: + assert len(minibatch)==2 + test_minibatch_size(minibatch,m.minibatch_size,len(ds),2,mi) + assert (minibatch[0][:,0:3:2]==minibatch[1]).all() + mi+=1 + i+=len(minibatch[0]) + assert i==len(ds) + assert mi==4 + del minibatch,i,m,mi + + i=0 + mi=0 + m=ds.minibatches(['x','y'], minibatch_size=3) + assert isinstance(m,DataSet.MinibatchWrapAroundIterator) + for minibatch in m: + assert len(minibatch)==2 + test_minibatch_size(minibatch,m.minibatch_size,len(ds),2,mi) + mi+=1 + for id in range(len(minibatch[0])): + assert (numpy.append(minibatch[0][id],minibatch[1][id])==array[i]).all() + i+=1 + assert i==len(ds) + assert mi==4 + del minibatch,i,id,m,mi + +# - for mini1,mini2,mini3 in dataset.minibatches([field1, field2, field3], minibatch_size=N): + i=0 + mi=0 + m=ds.minibatches(['x','z'], minibatch_size=3) + assert isinstance(m,DataSet.MinibatchWrapAroundIterator) + for x,z in m: + test_minibatch_field_size(x,m.minibatch_size,len(ds),mi) + test_minibatch_field_size(z,m.minibatch_size,len(ds),mi) + assert (x[:,0:3:2]==z).all() + i+=len(x) + mi+=1 + assert i==len(ds) + assert mi==4 + del x,z,i,m,mi + i=0 + mi=0 + m=ds.minibatches(['x','y'], minibatch_size=3) + for x,y in m: + test_minibatch_field_size(x,m.minibatch_size,len(ds),mi) + test_minibatch_field_size(y,m.minibatch_size,len(ds),mi) + mi+=1 + for id in range(len(x)): + assert (numpy.append(x[id],y[id])==array[i]).all() + i+=1 + assert i==len(ds) + assert mi==4 + del x,y,i,id,m,mi + +#not in doc + i=0 + m=ds.minibatches(['x','y'],n_batches=1,minibatch_size=3,offset=4) + assert isinstance(m,DataSet.MinibatchWrapAroundIterator) + for x,y in m: + assert len(x)==3 + assert len(y)==3 + for id in range(3): + assert (numpy.append(x[id],y[id])==array[i+4]).all() + i+=1 + assert i==3 + del x,y,i,id,m + + i=0 + m=ds.minibatches(['x','y'],n_batches=2,minibatch_size=3,offset=4) + assert isinstance(m,DataSet.MinibatchWrapAroundIterator) + for x,y in m: + assert len(x)==3 + assert len(y)==3 + for id in range(3): + assert (numpy.append(x[id],y[id])==array[i+4]).all() + i+=1 + assert i==6 + del x,y,i,id,m + + i=0 + m=ds.minibatches(['x','y'],n_batches=20,minibatch_size=3,offset=4) + assert isinstance(m,DataSet.MinibatchWrapAroundIterator) + for x,y in m: + assert len(x)==3 + assert len(y)==3 + for id in range(3): + assert (numpy.append(x[id],y[id])==array[(i+4)%array.shape[0]]).all() + i+=1 + assert i==m.n_batches*m.minibatch_size + del x,y,i,id + + +def test_ds_iterator(array,iterator1,iterator2,iterator3): + l=len(iterator1) + i=0 + for x,y in iterator1: + assert (x==array[i][:3]).all() + assert y==array[i][3] + assert (numpy.append(x,y)==array[i]).all() + i+=1 + assert i==l + i=0 + for y,z in iterator2: + assert y==array[i][3] + assert (z==array[i][0:3:2]).all() + i+=1 + assert i==l + i=0 + for x,y,z in iterator3: + assert (x==array[i][:3]).all() + assert y==array[i][3] + assert (z==array[i][0:3:2]).all() + assert (numpy.append(x,y)==array[i]).all() + i+=1 + assert i==l + +def test_getitem(array,ds): + def test_ds(orig,ds,index): + i=0 + assert len(ds)==len(index) + for x,z,y in ds('x','z','y'): + assert (orig[index[i]]['x']==array[index[i]][:3]).all() + assert (orig[index[i]]['x']==x).all() + assert orig[index[i]]['y']==array[index[i]][3] + assert orig[index[i]]['y']==y + assert (orig[index[i]]['z']==array[index[i]][0:3:2]).all() + assert (orig[index[i]]['z']==z).all() + i+=1 + del i + ds[0] + if len(ds)>2: + ds[:1] + ds[1:1] + ds[1:1:1] + if len(ds)>5: + ds[[1,2,3]] + for x in ds: + pass + +#ds[:n] returns a dataset with the n first examples. + ds2=ds[:3] + assert isinstance(ds2,DataSet) + test_ds(ds,ds2,index=[0,1,2]) + del ds2 + +#ds[i1:i2:s]# returns a ds with the examples i1,i1+s,...i2-s. + ds2=ds[1:7:2] + assert isinstance(ds2,DataSet) + test_ds(ds,ds2,[1,3,5]) + del ds2 + +#ds[i] + ds2=ds[5] + assert isinstance(ds2,Example) + assert have_raised("var['ds']["+str(len(ds))+"]",ds=ds) # index not defined + assert not have_raised("var['ds']["+str(len(ds)-1)+"]",ds=ds) + del ds2 + +#ds[[i1,i2,...in]]# returns a ds with examples i1,i2,...in. + ds2=ds[[4,7,2,8]] + assert isinstance(ds2,DataSet) + test_ds(ds,ds2,[4,7,2,8]) + del ds2 + +#ds[fieldname]# an iterable over the values of the field fieldname across + #the ds (the iterable is obtained by default by calling valuesVStack + #over the values for individual examples). + assert have_raised("ds['h']") # h is not defined... + assert have_raised("ds[['x']]") # bad syntax + assert not have_raised("var['ds']['x']",ds=ds) + isinstance(ds['x'],DataSetFields) + ds2=ds['x'] + assert len(ds['x'])==10 + assert len(ds['y'])==10 + assert len(ds['z'])==10 + i=0 + for example in ds['x']: + assert (example==array[i][:3]).all() + i+=1 + i=0 + for example in ds['y']: + assert (example==array[i][3]).all() + i+=1 + i=0 + for example in ds['z']: + assert (example==array[i,0:3:2]).all() + i+=1 + del ds2,i + +#ds.<property># returns the value of a property associated with + #the name <property>. The following properties should be supported: + # - 'description': a textual description or name for the ds + # - 'fieldtypes': a list of types (one per field) + +#* ds1 | ds2 | ds3 == ds.hstack([ds1,ds2,ds3])#???? + #hstack([ds('x','y'),ds('z')] + #hstack([ds('z','y'),ds('x')] + #assert have_thrown("hstack([ds('x'),ds('x')]") + #assert not have_thrown("hstack([ds('x'),ds('x')]") + #accept_nonunique_names + #assert have_thrown("hstack([ds('y','x'),ds('x')]") +# i=0 +# for example in hstack([ds('x'),ds('y'),ds('z')]): +# example==ds[i] +# i+=1 +# del i,example +#* ds1 & ds2 & ds3 == ds.vstack([ds1,ds2,ds3])#???? + + def test_ArrayDataSet(): #don't test stream #tested only with float value @@ -39,313 +335,18 @@ #don't test missing value #don't test with tuple #don't test proterties - def test_iterate_over_examples(array,ds): -#not in doc!!! - i=0 - for example in range(len(ds)): - assert (ds[example]['x']==a[example][:3]).all() - assert ds[example]['y']==a[example][3] - assert (ds[example]['z']==a[example][[0,2]]).all() - i+=1 - assert i==len(ds) - del example,i - -# - for example in dataset: - i=0 - for example in ds: - assert len(example)==3 - assert (example['x']==array[i][:3]).all() - assert example['y']==array[i][3] - assert (example['z']==array[i][0:3:2]).all() - assert (numpy.append(example['x'],example['y'])==array[i]).all() - i+=1 - assert i==len(ds) - del example,i - -# - for val1,val2,... in dataset: - i=0 - for x,y,z in ds: - assert (x==array[i][:3]).all() - assert y==array[i][3] - assert (z==array[i][0:3:2]).all() - assert (numpy.append(x,y)==array[i]).all() - i+=1 - assert i==len(ds) - del x,y,z,i - -# - for example in dataset(field1, field2,field3, ...): - i=0 - for example in ds('x','y','z'): - assert len(example)==3 - assert (example['x']==array[i][:3]).all() - assert example['y']==array[i][3] - assert (example['z']==array[i][0:3:2]).all() - assert (numpy.append(example['x'],example['y'])==array[i]).all() - i+=1 - assert i==len(ds) - del example,i - i=0 - for example in ds('y','x'): - assert len(example)==2 - assert (example['x']==array[i][:3]).all() - assert example['y']==array[i][3] - assert (numpy.append(example['x'],example['y'])==array[i]).all() - i+=1 - assert i==len(ds) - del example,i - -# - for val1,val2,val3 in dataset(field1, field2,field3): - i=0 - for x,y,z in ds('x','y','z'): - assert (x==array[i][:3]).all() - assert y==array[i][3] - assert (z==array[i][0:3:2]).all() - assert (numpy.append(x,y)==array[i]).all() - i+=1 - assert i==len(ds) - del x,y,z,i - i=0 - for y,x in ds('y','x',): - assert (x==array[i][:3]).all() - assert y==array[i][3] - assert (numpy.append(x,y)==array[i]).all() - i+=1 - assert i==len(ds) - del x,y,i - - def test_minibatch_size(minibatch,minibatch_size,len_ds,nb_field,nb_iter_finished): - ##full minibatch or the last minibatch - for idx in range(nb_field): - test_minibatch_field_size(minibatch[idx],minibatch_size,len_ds,nb_iter_finished) - del idx - def test_minibatch_field_size(minibatch_field,minibatch_size,len_ds,nb_iter_finished): - assert len(minibatch_field)==minibatch_size or ((nb_iter_finished*minibatch_size+len(minibatch_field))==len_ds and len(minibatch_field)<minibatch_size) - -# - for minibatch in dataset.minibatches([field1, field2, ...],minibatch_size=N): - i=0 - mi=0 - m=ds.minibatches(['x','z'], minibatch_size=3) - assert isinstance(m,DataSet.MinibatchWrapAroundIterator) - for minibatch in m: - assert len(minibatch)==2 - test_minibatch_size(minibatch,m.minibatch_size,len(ds),2,mi) - assert (minibatch[0][:,0:3:2]==minibatch[1]).all() - mi+=1 - i+=len(minibatch[0]) - assert i==len(ds) - assert mi==4 - del minibatch,i,m,mi - - i=0 - mi=0 - m=ds.minibatches(['x','y'], minibatch_size=3) - assert isinstance(m,DataSet.MinibatchWrapAroundIterator) - for minibatch in m: - assert len(minibatch)==2 - test_minibatch_size(minibatch,m.minibatch_size,len(ds),2,mi) - mi+=1 - for id in range(len(minibatch[0])): - assert (numpy.append(minibatch[0][id],minibatch[1][id])==a[i]).all() - i+=1 - assert i==len(ds) - assert mi==4 - del minibatch,i,id,m,mi - -# - for mini1,mini2,mini3 in dataset.minibatches([field1, field2, field3], minibatch_size=N): - i=0 - mi=0 - m=ds.minibatches(['x','z'], minibatch_size=3) - assert isinstance(m,DataSet.MinibatchWrapAroundIterator) - for x,z in m: - test_minibatch_field_size(x,m.minibatch_size,len(ds),mi) - test_minibatch_field_size(z,m.minibatch_size,len(ds),mi) - assert (x[:,0:3:2]==z).all() - i+=len(x) - mi+=1 - assert i==len(ds) - assert mi==4 - del x,z,i,m,mi - i=0 - mi=0 - m=ds.minibatches(['x','y'], minibatch_size=3) - for x,y in m: - test_minibatch_field_size(x,m.minibatch_size,len(ds),mi) - test_minibatch_field_size(y,m.minibatch_size,len(ds),mi) - mi+=1 - for id in range(len(x)): - assert (numpy.append(x[id],y[id])==a[i]).all() - i+=1 - assert i==len(ds) - assert mi==4 - del x,y,i,id,m,mi - -#not in doc - i=0 - m=ds.minibatches(['x','y'],n_batches=1,minibatch_size=3,offset=4) - assert isinstance(m,DataSet.MinibatchWrapAroundIterator) - for x,y in m: - assert len(x)==3 - assert len(y)==3 - for id in range(3): - assert (numpy.append(x[id],y[id])==a[i+4]).all() - i+=1 - assert i==3 - del x,y,i,id,m - - i=0 - m=ds.minibatches(['x','y'],n_batches=2,minibatch_size=3,offset=4) - assert isinstance(m,DataSet.MinibatchWrapAroundIterator) - for x,y in m: - assert len(x)==3 - assert len(y)==3 - for id in range(3): - assert (numpy.append(x[id],y[id])==a[i+4]).all() - i+=1 - assert i==6 - del x,y,i,id,m - - i=0 - m=ds.minibatches(['x','y'],n_batches=20,minibatch_size=3,offset=4) - assert isinstance(m,DataSet.MinibatchWrapAroundIterator) - for x,y in m: - assert len(x)==3 - assert len(y)==3 - for id in range(3): - assert (numpy.append(x[id],y[id])==a[(i+4)%a.shape[0]]).all() - i+=1 - assert i==m.n_batches*m.minibatch_size - del x,y,i,id - - - def test_ds_iterator(array,iterator1,iterator2,iterator3): - i=0 - for x,y in iterator1: - assert (x==array[i][:3]).all() - assert y==array[i][3] - assert (numpy.append(x,y)==array[i]).all() - i+=1 - assert i==len(ds) - i=0 - for y,z in iterator2: - assert y==array[i][3] - assert (z==array[i][0:3:2]).all() - i+=1 - assert i==len(ds) - i=0 - for x,y,z in iterator3: - assert (x==array[i][:3]).all() - assert y==array[i][3] - assert (z==array[i][0:3:2]).all() - assert (numpy.append(x,y)==array[i]).all() - i+=1 - assert i==len(ds) - - def test_getitem(array,ds): - - def test_ds(orig,ds,index): - i=0 - assert len(ds)==len(index) - for x,z,y in ds('x','z','y'): - assert (orig[index[i]]['x']==array[index[i]][:3]).all() - assert (orig[index[i]]['x']==x).all() - assert orig[index[i]]['y']==array[index[i]][3] - assert orig[index[i]]['y']==y - assert (orig[index[i]]['z']==array[index[i]][0:3:2]).all() - assert (orig[index[i]]['z']==z).all() - i+=1 - del i - ds[0] - if len(ds)>2: - ds[:1] - ds[1:1] - ds[1:1:1] - if len(ds)>5: - ds[[1,2,3]] - for x in ds: - pass - - #ds[:n] returns a dataset with the n first examples. - ds2=ds[:3] - assert isinstance(ds2,DataSet) - test_ds(ds,ds2,index=[0,1,2]) - del ds2 - - #ds[i1:i2:s]# returns a ds with the examples i1,i1+s,...i2-s. - ds2=ds[1:7:2] - assert isinstance(ds2,DataSet) - test_ds(ds,ds2,[1,3,5]) - del ds2 - - #ds[i] - ds2=ds[5] - assert isinstance(ds2,Example) - assert have_raised("ds["+str(len(ds))+"]") # index not defined - assert not have_raised("ds["+str(len(ds)-1)+"]") - del ds2 - - #ds[[i1,i2,...in]]# returns a ds with examples i1,i2,...in. - ds2=ds[[4,7,2,8]] - assert isinstance(ds2,DataSet) - test_ds(ds,ds2,[4,7,2,8]) - del ds2 - - #ds[fieldname]# an iterable over the values of the field fieldname across - #the ds (the iterable is obtained by default by calling valuesVStack - #over the values for individual examples). - assert have_raised("ds['h']") # h is not defined... - assert have_raised("ds[['x']]") # bad syntax - assert not have_raised("ds['x']") - isinstance(ds['x'],DataSetFields) - ds2=ds['x'] - assert len(ds['x'])==10 - assert len(ds['y'])==10 - assert len(ds['z'])==10 - i=0 - for example in ds['x']: - assert (example==a[i][:3]).all() - i+=1 - i=0 - for example in ds['y']: - assert (example==a[i][3]).all() - i+=1 - i=0 - for example in ds['z']: - assert (example==a[i,0:3:2]).all() - i+=1 - del ds2,i - - #ds.<property># returns the value of a property associated with - #the name <property>. The following properties should be supported: - # - 'description': a textual description or name for the ds - # - 'fieldtypes': a list of types (one per field) - - #* ds1 | ds2 | ds3 == ds.hstack([ds1,ds2,ds3])#???? - #hstack([ds('x','y'),ds('z')] - #hstack([ds('z','y'),ds('x')] - #assert have_thrown("hstack([ds('x'),ds('x')]") - #assert not have_thrown("hstack([ds('x'),ds('x')]") - #accept_nonunique_names - #assert have_thrown("hstack([ds('y','x'),ds('x')]") -# i=0 -# for example in hstack([ds('x'),ds('y'),ds('z')]): -# example==ds[i] -# i+=1 -# del i,example - #* ds1 & ds2 & ds3 == ds.vstack([ds1,ds2,ds3])#???? - - print "test_ArrayDataSet" - a = numpy.random.rand(10,4) - ds = ArrayDataSet(a,{'x':slice(3),'y':3,'z':[0,2]})###???tuple not tested - ds = ArrayDataSet(a,LookupList(['x','y','z'],[slice(3),3,[0,2]]))###???tuple not tested + a2 = numpy.random.rand(10,4) + ds = ArrayDataSet(a2,{'x':slice(3),'y':3,'z':[0,2]})###???tuple not tested + ds = ArrayDataSet(a2,LookupList(['x','y','z'],[slice(3),3,[0,2]]))###???tuple not tested assert len(ds)==10 #assert ds==a? should this work? - test_iterate_over_examples(a, ds) - test_getitem(a, ds) + test_iterate_over_examples(a2, ds) + test_getitem(a2, ds) # - for val1,val2,val3 in dataset(field1, field2,field3): - test_ds_iterator(a,ds('x','y'),ds('y','z'),ds('x','y','z')) + test_ds_iterator(a2,ds('x','y'),ds('y','z'),ds('x','y','z')) assert len(ds.fields())==3 @@ -380,7 +381,7 @@ example2 = LookupList(['v','w'], ['a','b']) example3 = LookupList(['x','y','z','u','v','w'], [[1, 2, 3],2,3,0,'a','b']) assert example+example2==example3 - assert have_raised("example+example") + assert have_raised("var['x']+var['x']",x=example) def test_ApplyFunctionDataSet(): print "test_ApplyFunctionDataSet"