# HG changeset patch # User Yoshua Bengio # Date 1210621834 14400 # Node ID 625d2b21ee489e01ba00fd88e37d104fd3362d4e # Parent 933db7ece663496fde755eeb596fca907a7de76e# Parent 8173e196e2914f99e372846d0db9253d88f53a85 Automated merge with ssh://p-omega1@lgcm.iro.umontreal.ca/tlearn diff -r 8173e196e291 -r 625d2b21ee48 test_dataset.py --- a/test_dataset.py Mon May 12 15:50:28 2008 -0400 +++ b/test_dataset.py Mon May 12 15:50:34 2008 -0400 @@ -3,7 +3,8 @@ from math import * import numpy -def have_raised(to_eval): +def have_raised(to_eval, **var): + have_thrown = False try: eval(to_eval) @@ -32,6 +33,301 @@ print "var=",var print "take a slice and look at field y",ds[1:6:2]["y"] +def test_iterate_over_examples(array,ds): +#not in doc!!! + i=0 + for example in range(len(ds)): + assert (ds[example]['x']==array[example][:3]).all() + assert ds[example]['y']==array[example][3] + assert (ds[example]['z']==array[example][[0,2]]).all() + i+=1 + assert i==len(ds) + del example,i + +# - for example in dataset: + i=0 + for example in ds: + assert len(example)==3 + assert (example['x']==array[i][:3]).all() + assert example['y']==array[i][3] + assert (example['z']==array[i][0:3:2]).all() + assert (numpy.append(example['x'],example['y'])==array[i]).all() + i+=1 + assert i==len(ds) + del example,i + +# - for val1,val2,... in dataset: + i=0 + for x,y,z in ds: + assert (x==array[i][:3]).all() + assert y==array[i][3] + assert (z==array[i][0:3:2]).all() + assert (numpy.append(x,y)==array[i]).all() + i+=1 + assert i==len(ds) + del x,y,z,i + +# - for example in dataset(field1, field2,field3, ...): + i=0 + for example in ds('x','y','z'): + assert len(example)==3 + assert (example['x']==array[i][:3]).all() + assert example['y']==array[i][3] + assert (example['z']==array[i][0:3:2]).all() + assert (numpy.append(example['x'],example['y'])==array[i]).all() + i+=1 + assert i==len(ds) + del example,i + i=0 + for example in ds('y','x'): + assert len(example)==2 + assert (example['x']==array[i][:3]).all() + assert example['y']==array[i][3] + assert (numpy.append(example['x'],example['y'])==array[i]).all() + i+=1 + assert i==len(ds) + del example,i + +# - for val1,val2,val3 in dataset(field1, field2,field3): + i=0 + for x,y,z in ds('x','y','z'): + assert (x==array[i][:3]).all() + assert y==array[i][3] + assert (z==array[i][0:3:2]).all() + assert (numpy.append(x,y)==array[i]).all() + i+=1 + assert i==len(ds) + del x,y,z,i + i=0 + for y,x in ds('y','x',): + assert (x==array[i][:3]).all() + assert y==array[i][3] + assert (numpy.append(x,y)==array[i]).all() + i+=1 + assert i==len(ds) + del x,y,i + + def test_minibatch_size(minibatch,minibatch_size,len_ds,nb_field,nb_iter_finished): + ##full minibatch or the last minibatch + for idx in range(nb_field): + test_minibatch_field_size(minibatch[idx],minibatch_size,len_ds,nb_iter_finished) + del idx + def test_minibatch_field_size(minibatch_field,minibatch_size,len_ds,nb_iter_finished): + assert len(minibatch_field)==minibatch_size or ((nb_iter_finished*minibatch_size+len(minibatch_field))==len_ds and len(minibatch_field)2: + ds[:1] + ds[1:1] + ds[1:1:1] + if len(ds)>5: + ds[[1,2,3]] + for x in ds: + pass + +#ds[:n] returns a dataset with the n first examples. + ds2=ds[:3] + assert isinstance(ds2,DataSet) + test_ds(ds,ds2,index=[0,1,2]) + del ds2 + +#ds[i1:i2:s]# returns a ds with the examples i1,i1+s,...i2-s. + ds2=ds[1:7:2] + assert isinstance(ds2,DataSet) + test_ds(ds,ds2,[1,3,5]) + del ds2 + +#ds[i] + ds2=ds[5] + assert isinstance(ds2,Example) + assert have_raised("var['ds']["+str(len(ds))+"]",ds=ds) # index not defined + assert not have_raised("var['ds']["+str(len(ds)-1)+"]",ds=ds) + del ds2 + +#ds[[i1,i2,...in]]# returns a ds with examples i1,i2,...in. + ds2=ds[[4,7,2,8]] + assert isinstance(ds2,DataSet) + test_ds(ds,ds2,[4,7,2,8]) + del ds2 + +#ds[fieldname]# an iterable over the values of the field fieldname across + #the ds (the iterable is obtained by default by calling valuesVStack + #over the values for individual examples). + assert have_raised("ds['h']") # h is not defined... + assert have_raised("ds[['x']]") # bad syntax + assert not have_raised("var['ds']['x']",ds=ds) + isinstance(ds['x'],DataSetFields) + ds2=ds['x'] + assert len(ds['x'])==10 + assert len(ds['y'])==10 + assert len(ds['z'])==10 + i=0 + for example in ds['x']: + assert (example==array[i][:3]).all() + i+=1 + i=0 + for example in ds['y']: + assert (example==array[i][3]).all() + i+=1 + i=0 + for example in ds['z']: + assert (example==array[i,0:3:2]).all() + i+=1 + del ds2,i + +#ds.# returns the value of a property associated with + #the name . The following properties should be supported: + # - 'description': a textual description or name for the ds + # - 'fieldtypes': a list of types (one per field) + +#* ds1 | ds2 | ds3 == ds.hstack([ds1,ds2,ds3])#???? + #hstack([ds('x','y'),ds('z')] + #hstack([ds('z','y'),ds('x')] + #assert have_thrown("hstack([ds('x'),ds('x')]") + #assert not have_thrown("hstack([ds('x'),ds('x')]") + #accept_nonunique_names + #assert have_thrown("hstack([ds('y','x'),ds('x')]") +# i=0 +# for example in hstack([ds('x'),ds('y'),ds('z')]): +# example==ds[i] +# i+=1 +# del i,example +#* ds1 & ds2 & ds3 == ds.vstack([ds1,ds2,ds3])#???? + + def test_ArrayDataSet(): #don't test stream #tested only with float value @@ -39,313 +335,18 @@ #don't test missing value #don't test with tuple #don't test proterties - def test_iterate_over_examples(array,ds): -#not in doc!!! - i=0 - for example in range(len(ds)): - assert (ds[example]['x']==a[example][:3]).all() - assert ds[example]['y']==a[example][3] - assert (ds[example]['z']==a[example][[0,2]]).all() - i+=1 - assert i==len(ds) - del example,i - -# - for example in dataset: - i=0 - for example in ds: - assert len(example)==3 - assert (example['x']==array[i][:3]).all() - assert example['y']==array[i][3] - assert (example['z']==array[i][0:3:2]).all() - assert (numpy.append(example['x'],example['y'])==array[i]).all() - i+=1 - assert i==len(ds) - del example,i - -# - for val1,val2,... in dataset: - i=0 - for x,y,z in ds: - assert (x==array[i][:3]).all() - assert y==array[i][3] - assert (z==array[i][0:3:2]).all() - assert (numpy.append(x,y)==array[i]).all() - i+=1 - assert i==len(ds) - del x,y,z,i - -# - for example in dataset(field1, field2,field3, ...): - i=0 - for example in ds('x','y','z'): - assert len(example)==3 - assert (example['x']==array[i][:3]).all() - assert example['y']==array[i][3] - assert (example['z']==array[i][0:3:2]).all() - assert (numpy.append(example['x'],example['y'])==array[i]).all() - i+=1 - assert i==len(ds) - del example,i - i=0 - for example in ds('y','x'): - assert len(example)==2 - assert (example['x']==array[i][:3]).all() - assert example['y']==array[i][3] - assert (numpy.append(example['x'],example['y'])==array[i]).all() - i+=1 - assert i==len(ds) - del example,i - -# - for val1,val2,val3 in dataset(field1, field2,field3): - i=0 - for x,y,z in ds('x','y','z'): - assert (x==array[i][:3]).all() - assert y==array[i][3] - assert (z==array[i][0:3:2]).all() - assert (numpy.append(x,y)==array[i]).all() - i+=1 - assert i==len(ds) - del x,y,z,i - i=0 - for y,x in ds('y','x',): - assert (x==array[i][:3]).all() - assert y==array[i][3] - assert (numpy.append(x,y)==array[i]).all() - i+=1 - assert i==len(ds) - del x,y,i - - def test_minibatch_size(minibatch,minibatch_size,len_ds,nb_field,nb_iter_finished): - ##full minibatch or the last minibatch - for idx in range(nb_field): - test_minibatch_field_size(minibatch[idx],minibatch_size,len_ds,nb_iter_finished) - del idx - def test_minibatch_field_size(minibatch_field,minibatch_size,len_ds,nb_iter_finished): - assert len(minibatch_field)==minibatch_size or ((nb_iter_finished*minibatch_size+len(minibatch_field))==len_ds and len(minibatch_field)2: - ds[:1] - ds[1:1] - ds[1:1:1] - if len(ds)>5: - ds[[1,2,3]] - for x in ds: - pass - - #ds[:n] returns a dataset with the n first examples. - ds2=ds[:3] - assert isinstance(ds2,DataSet) - test_ds(ds,ds2,index=[0,1,2]) - del ds2 - - #ds[i1:i2:s]# returns a ds with the examples i1,i1+s,...i2-s. - ds2=ds[1:7:2] - assert isinstance(ds2,DataSet) - test_ds(ds,ds2,[1,3,5]) - del ds2 - - #ds[i] - ds2=ds[5] - assert isinstance(ds2,Example) - assert have_raised("ds["+str(len(ds))+"]") # index not defined - assert not have_raised("ds["+str(len(ds)-1)+"]") - del ds2 - - #ds[[i1,i2,...in]]# returns a ds with examples i1,i2,...in. - ds2=ds[[4,7,2,8]] - assert isinstance(ds2,DataSet) - test_ds(ds,ds2,[4,7,2,8]) - del ds2 - - #ds[fieldname]# an iterable over the values of the field fieldname across - #the ds (the iterable is obtained by default by calling valuesVStack - #over the values for individual examples). - assert have_raised("ds['h']") # h is not defined... - assert have_raised("ds[['x']]") # bad syntax - assert not have_raised("ds['x']") - isinstance(ds['x'],DataSetFields) - ds2=ds['x'] - assert len(ds['x'])==10 - assert len(ds['y'])==10 - assert len(ds['z'])==10 - i=0 - for example in ds['x']: - assert (example==a[i][:3]).all() - i+=1 - i=0 - for example in ds['y']: - assert (example==a[i][3]).all() - i+=1 - i=0 - for example in ds['z']: - assert (example==a[i,0:3:2]).all() - i+=1 - del ds2,i - - #ds.# returns the value of a property associated with - #the name . The following properties should be supported: - # - 'description': a textual description or name for the ds - # - 'fieldtypes': a list of types (one per field) - - #* ds1 | ds2 | ds3 == ds.hstack([ds1,ds2,ds3])#???? - #hstack([ds('x','y'),ds('z')] - #hstack([ds('z','y'),ds('x')] - #assert have_thrown("hstack([ds('x'),ds('x')]") - #assert not have_thrown("hstack([ds('x'),ds('x')]") - #accept_nonunique_names - #assert have_thrown("hstack([ds('y','x'),ds('x')]") -# i=0 -# for example in hstack([ds('x'),ds('y'),ds('z')]): -# example==ds[i] -# i+=1 -# del i,example - #* ds1 & ds2 & ds3 == ds.vstack([ds1,ds2,ds3])#???? - - print "test_ArrayDataSet" - a = numpy.random.rand(10,4) - ds = ArrayDataSet(a,{'x':slice(3),'y':3,'z':[0,2]})###???tuple not tested - ds = ArrayDataSet(a,LookupList(['x','y','z'],[slice(3),3,[0,2]]))###???tuple not tested + a2 = numpy.random.rand(10,4) + ds = ArrayDataSet(a2,{'x':slice(3),'y':3,'z':[0,2]})###???tuple not tested + ds = ArrayDataSet(a2,LookupList(['x','y','z'],[slice(3),3,[0,2]]))###???tuple not tested assert len(ds)==10 #assert ds==a? should this work? - test_iterate_over_examples(a, ds) - test_getitem(a, ds) + test_iterate_over_examples(a2, ds) + test_getitem(a2, ds) # - for val1,val2,val3 in dataset(field1, field2,field3): - test_ds_iterator(a,ds('x','y'),ds('y','z'),ds('x','y','z')) + test_ds_iterator(a2,ds('x','y'),ds('y','z'),ds('x','y','z')) assert len(ds.fields())==3 @@ -380,7 +381,7 @@ example2 = LookupList(['v','w'], ['a','b']) example3 = LookupList(['x','y','z','u','v','w'], [[1, 2, 3],2,3,0,'a','b']) assert example+example2==example3 - assert have_raised("example+example") + assert have_raised("var['x']+var['x']",x=example) def test_ApplyFunctionDataSet(): print "test_ApplyFunctionDataSet"