# HG changeset patch
# User James Bergstra <bergstrj@iro.umontreal.ca>
# Date 1212782178 14400
# Node ID 174374d5940555ad068fc9d4b13cf4dfa147b733
# Parent  4e6b550fe131bc3b5d7027da11b93b7d916d38b7# Parent  2d08f46d17d8b2661615e26ae5a838ac2d70edb2
merge

diff -r 4e6b550fe131 -r 174374d59405 _nnet_ops.py
--- a/_nnet_ops.py	Thu Jun 05 18:43:16 2008 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,41 +0,0 @@
-
-import unittest
-import theano._test_tensor as TT
-import numpy
-
-from nnet_ops import *
-
-class T_sigmoid(unittest.TestCase):
-    def setUp(self):
-        numpy.random.seed(9999)
-    def test_elemwise(self):
-        TT.verify_grad(self, sigmoid, [numpy.random.rand(3,4)])
-
-class T_softplus(unittest.TestCase):
-    def setUp(self):
-        numpy.random.seed(9999)
-    def test_elemwise(self):
-        TT.verify_grad(self, softplus, [numpy.random.rand(3,4)])
-
-class T_CrossentropySoftmax1Hot(unittest.TestCase):
-    def setUp(self):
-        numpy.random.seed(9999)
-    def test0(self):
-        y_idx = [0,1,3]
-        class Dummy(object):
-            def make_node(self, a,b):
-                return crossentropy_softmax_1hot_with_bias(a, b, y_idx)[0:1]
-        TT.verify_grad(self, Dummy(), [numpy.random.rand(3,4),
-            numpy.random.rand(4)])
-
-    def test1(self):
-        y_idx = [0,1,3]
-        class Dummy(object):
-            def make_node(self, a):
-                return crossentropy_softmax_1hot(a, y_idx)[0:1]
-        TT.verify_grad(self, Dummy(), [numpy.random.rand(3,4)])
-
-
-
-if __name__ == '__main__':
-    unittest.main()
diff -r 4e6b550fe131 -r 174374d59405 _test_dataset.py
--- a/_test_dataset.py	Thu Jun 05 18:43:16 2008 -0400
+++ b/_test_dataset.py	Fri Jun 06 15:56:18 2008 -0400
@@ -1,183 +1,442 @@
+#!/bin/env python
 from dataset import *
 from math import *
-import unittest
-import sys
-import numpy as N
+import numpy,unittest
+from misc import *
+
+def have_raised(to_eval, **var):
+    have_thrown = False
+    try:
+        eval(to_eval)
+    except :
+        have_thrown = True
+    return have_thrown
+
+def have_raised2(f, *args, **kwargs):
+    have_thrown = False
+    try:
+        f(*args, **kwargs)
+    except :
+        have_thrown = True
+    return have_thrown
+
+def test1():
+    print "test1"
+    global a,ds
+    a = numpy.random.rand(10,4)
+    print a
+    ds = ArrayDataSet(a,{'x':slice(3),'y':3,'z':[0,2]})
+    print "len(ds)=",len(ds)
+    assert(len(ds)==10)
+    print "example 0 = ",ds[0]
+#    assert
+    print "x=",ds["x"]
+    print "x|y"
+    for x,y in ds("x","y"):
+        print x,y
+    minibatch_iterator = ds.minibatches(fieldnames=['z','y'],n_batches=1,minibatch_size=3,offset=4)
+    minibatch = minibatch_iterator.__iter__().next()
+    print "minibatch=",minibatch
+    for var in minibatch:
+        print "var=",var
+    print "take a slice and look at field y",ds[1:6:2]["y"]
+
+    del a,ds,x,y,minibatch_iterator,minibatch,var
 
-def _sum_all(a):
-    s=a
-    while isinstance(s,numpy.ndarray):
-        s=sum(s)
-    return s
-    
-class T_arraydataset(unittest.TestCase):
-    def setUp(self):
-        numpy.random.seed(123456)
+def test_iterate_over_examples(array,ds):
+#not in doc!!!
+    i=0
+    for example in range(len(ds)):
+        assert (ds[example]['x']==array[example][:3]).all()
+        assert ds[example]['y']==array[example][3]
+        assert (ds[example]['z']==array[example][[0,2]]).all()
+        i+=1
+    assert i==len(ds)
+    del example,i
+
+#     - for example in dataset:
+    i=0
+    for example in ds:
+        assert len(example)==3
+        assert (example['x']==array[i][:3]).all()
+        assert example['y']==array[i][3]
+        assert (example['z']==array[i][0:3:2]).all()
+        assert (numpy.append(example['x'],example['y'])==array[i]).all()
+        i+=1
+    assert i==len(ds)
+    del example,i
 
+#     - for val1,val2,... in dataset:
+    i=0
+    for x,y,z in ds:
+        assert (x==array[i][:3]).all()
+        assert y==array[i][3]
+        assert (z==array[i][0:3:2]).all()
+        assert (numpy.append(x,y)==array[i]).all()
+        i+=1
+    assert i==len(ds)
+    del x,y,z,i
+
+#     - for example in dataset(field1, field2,field3, ...):
+    i=0
+    for example in ds('x','y','z'):
+        assert len(example)==3
+        assert (example['x']==array[i][:3]).all()
+        assert example['y']==array[i][3]
+        assert (example['z']==array[i][0:3:2]).all()
+        assert (numpy.append(example['x'],example['y'])==array[i]).all()
+        i+=1
+    assert i==len(ds)
+    del example,i
+    i=0
+    for example in ds('y','x'):
+        assert len(example)==2
+        assert (example['x']==array[i][:3]).all()
+        assert example['y']==array[i][3]
+        assert (numpy.append(example['x'],example['y'])==array[i]).all()
+        i+=1
+    assert i==len(ds)
+    del example,i
 
-    def test_ctor_len(self):
-        n = numpy.random.rand(8,3)
-        a=ArrayDataSet(n)
-        self.failUnless(a.data is n)
-        self.failUnless(a.fields is None)
+#     - for val1,val2,val3 in dataset(field1, field2,field3):
+    i=0
+    for x,y,z in ds('x','y','z'):
+        assert (x==array[i][:3]).all()
+        assert y==array[i][3]
+        assert (z==array[i][0:3:2]).all()
+        assert (numpy.append(x,y)==array[i]).all()
+        i+=1
+    assert i==len(ds)
+    del x,y,z,i
+    i=0
+    for y,x in ds('y','x',):
+        assert (x==array[i][:3]).all()
+        assert y==array[i][3]
+        assert (numpy.append(x,y)==array[i]).all()
+        i+=1
+    assert i==len(ds)
+    del x,y,i
 
-        self.failUnless(len(a) == n.shape[0])
-        self.failUnless(a[0].shape == (n.shape[1],))
+    def test_minibatch_size(minibatch,minibatch_size,len_ds,nb_field,nb_iter_finished):
+        ##full minibatch or the last minibatch
+        for idx in range(nb_field):
+            test_minibatch_field_size(minibatch[idx],minibatch_size,len_ds,nb_iter_finished)
+        del idx
+    def test_minibatch_field_size(minibatch_field,minibatch_size,len_ds,nb_iter_finished):
+        assert len(minibatch_field)==minibatch_size or ((nb_iter_finished*minibatch_size+len(minibatch_field))==len_ds and len(minibatch_field)<minibatch_size)
+
+#     - for minibatch in dataset.minibatches([field1, field2, ...],minibatch_size=N):
+    i=0
+    mi=0
+    m=ds.minibatches(['x','z'], minibatch_size=3)
+    assert isinstance(m,DataSet.MinibatchWrapAroundIterator)
+    for minibatch in m:
+        assert isinstance(minibatch,DataSetFields)
+        assert len(minibatch)==2
+        test_minibatch_size(minibatch,m.minibatch_size,len(ds),2,mi)
+        if type(ds)==ArrayDataSet:
+            assert (minibatch[0][:,::2]==minibatch[1]).all()
+        else:
+            for j in xrange(len(minibatch[0])):
+                (minibatch[0][j][::2]==minibatch[1][j]).all()
+        mi+=1
+        i+=len(minibatch[0])
+    assert i==len(ds)
+    assert mi==4
+    del minibatch,i,m,mi
 
-    def test_iter(self):
-        arr = numpy.random.rand(8,3)
-        a=ArrayDataSet(data=arr,fields={"x":slice(2),"y":slice(1,3)})
-        for i, example in enumerate(a):
-            self.failUnless(numpy.all( example['x'] == arr[i,:2]))
-            self.failUnless(numpy.all( example['y'] == arr[i,1:3]))
+    i=0
+    mi=0
+    m=ds.minibatches(['x','y'], minibatch_size=3)
+    assert isinstance(m,DataSet.MinibatchWrapAroundIterator)
+    for minibatch in m:
+        assert len(minibatch)==2
+        test_minibatch_size(minibatch,m.minibatch_size,len(ds),2,mi)
+        mi+=1
+        for id in range(len(minibatch[0])):
+            assert (numpy.append(minibatch[0][id],minibatch[1][id])==array[i]).all()
+            i+=1
+    assert i==len(ds)
+    assert mi==4
+    del minibatch,i,id,m,mi
 
-    def test_zip(self):
-        arr = numpy.random.rand(8,3)
-        a=ArrayDataSet(data=arr,fields={"x":slice(2),"y":slice(1,3)})
-        for i, x in enumerate(a.zip("x")):
-            self.failUnless(numpy.all( x == arr[i,:2]))
+#     - for mini1,mini2,mini3 in dataset.minibatches([field1, field2, field3], minibatch_size=N):
+    i=0
+    mi=0
+    m=ds.minibatches(['x','z'], minibatch_size=3)
+    assert isinstance(m,DataSet.MinibatchWrapAroundIterator)
+    for x,z in m:
+        test_minibatch_field_size(x,m.minibatch_size,len(ds),mi)
+        test_minibatch_field_size(z,m.minibatch_size,len(ds),mi)
+        for id in range(len(x)):
+            assert (x[id][::2]==z[id]).all()
+            i+=1
+        mi+=1
+    assert i==len(ds)
+    assert mi==4
+    del x,z,i,m,mi
+    i=0
+    mi=0
+    m=ds.minibatches(['x','y'], minibatch_size=3)
+    for x,y in m:
+        test_minibatch_field_size(x,m.minibatch_size,len(ds),mi)
+        test_minibatch_field_size(y,m.minibatch_size,len(ds),mi)
+        mi+=1
+        for id in range(len(x)):
+            assert (numpy.append(x[id],y[id])==array[i]).all()
+            i+=1
+    assert i==len(ds)
+    assert mi==4
+    del x,y,i,id,m,mi
+
+#not in doc
+    i=0
+    m=ds.minibatches(['x','y'],n_batches=1,minibatch_size=3,offset=4)
+    assert isinstance(m,DataSet.MinibatchWrapAroundIterator)
+    for x,y in m:
+        assert len(x)==m.minibatch_size
+        assert len(y)==m.minibatch_size
+        for id in range(m.minibatch_size):
+            assert (numpy.append(x[id],y[id])==array[i+4]).all()
+            i+=1
+    assert i==m.n_batches*m.minibatch_size
+    del x,y,i,id,m
 
-    def test_minibatch_basic(self):
-        arr = numpy.random.rand(10,4)
-        a=ArrayDataSet(data=arr,fields={"x":slice(2),"y":slice(1,4)})
-        for i, mb in enumerate(a.minibatches(minibatch_size=2)): #all fields
-            self.failUnless(numpy.all( mb['x'] == arr[i*2:i*2+2,0:2]))
-            self.failUnless(numpy.all( mb['y'] == arr[i*2:i*2+2,1:4]))
+    i=0
+    m=ds.minibatches(['x','y'],n_batches=2,minibatch_size=3,offset=4)
+    assert isinstance(m,DataSet.MinibatchWrapAroundIterator)
+    for x,y in m:
+        assert len(x)==m.minibatch_size
+        assert len(y)==m.minibatch_size
+        for id in range(m.minibatch_size):
+            assert (numpy.append(x[id],y[id])==array[i+4]).all()
+            i+=1
+    assert i==m.n_batches*m.minibatch_size
+    del x,y,i,id,m
+
+    i=0
+    m=ds.minibatches(['x','y'],n_batches=20,minibatch_size=3,offset=4)
+    assert isinstance(m,DataSet.MinibatchWrapAroundIterator)
+    for x,y in m:
+        assert len(x)==m.minibatch_size
+        assert len(y)==m.minibatch_size
+        for id in range(m.minibatch_size):
+            assert (numpy.append(x[id],y[id])==array[(i+4)%array.shape[0]]).all()
+            i+=1
+    assert i==m.n_batches*m.minibatch_size
+    del x,y,i,id
+
+    #@todo: we can't do minibatch bigger then the size of the dataset???
+    assert have_raised2(ds.minibatches,['x','y'],n_batches=1,minibatch_size=len(array)+1,offset=0)
+    assert not have_raised2(ds.minibatches,['x','y'],n_batches=1,minibatch_size=len(array),offset=0)
 
-    def test_getattr(self):
-        arr = numpy.random.rand(10,4)
-        a=ArrayDataSet(data=arr,fields={"x":slice(2),"y":slice(1,4)})
-        a_y = a.y
-        self.failUnless(numpy.all( a_y == arr[:,1:4]))
+def test_ds_iterator(array,iterator1,iterator2,iterator3):
+    l=len(iterator1)
+    i=0
+    for x,y in iterator1:
+        assert (x==array[i][:3]).all()
+        assert y==array[i][3]
+        assert (numpy.append(x,y)==array[i]).all()
+        i+=1
+    assert i==l
+    i=0
+    for y,z in iterator2:
+        assert y==array[i][3]
+        assert (z==array[i][0:3:2]).all()
+        i+=1
+    assert i==l
+    i=0
+    for x,y,z in iterator3:
+        assert (x==array[i][:3]).all()
+        assert y==array[i][3]
+        assert (z==array[i][0:3:2]).all()
+        assert (numpy.append(x,y)==array[i]).all()
+        i+=1
+    assert i==l
 
-    def test_minibatch_wraparound_even(self):
-        arr = numpy.random.rand(10,4)
-        arr2 = ArrayDataSet.Iterator.matcat(arr,arr)
+def test_getitem(array,ds):
+    def test_ds(orig,ds,index):
+        i=0
+        assert len(ds)==len(index)
+        for x,z,y in ds('x','z','y'):
+            assert (orig[index[i]]['x']==array[index[i]][:3]).all()
+            assert (orig[index[i]]['x']==x).all()
+            assert orig[index[i]]['y']==array[index[i]][3]
+            assert orig[index[i]]['y']==y
+            assert (orig[index[i]]['z']==array[index[i]][0:3:2]).all()
+            assert (orig[index[i]]['z']==z).all()
+            i+=1
+        del i
+        ds[0]
+        if len(ds)>2:
+            ds[:1]
+            ds[1:1]
+            ds[1:1:1]
+        if len(ds)>5:
+            ds[[1,2,3]]
+        for x in ds:
+            pass
 
-        a=ArrayDataSet(data=arr,fields={"x":slice(2),"y":slice(1,4)})
+#ds[:n] returns a dataset with the n first examples.
+    ds2=ds[:3]
+    assert isinstance(ds2,DataSet)
+    test_ds(ds,ds2,index=[0,1,2])
+    del ds2
+
+#ds[i1:i2:s]# returns a ds with the examples i1,i1+s,...i2-s.
+    ds2=ds[1:7:2]
+    assert isinstance(ds2,DataSet)
+    test_ds(ds,ds2,[1,3,5])
+    del ds2
+
+#ds[i]
+    ds2=ds[5]
+    assert isinstance(ds2,Example)
+    assert have_raised("var['ds']["+str(len(ds))+"]",ds=ds)  # index not defined
+    assert not have_raised("var['ds']["+str(len(ds)-1)+"]",ds=ds)
+    del ds2
+
+#ds[[i1,i2,...in]]# returns a ds with examples i1,i2,...in.
+    ds2=ds[[4,7,2,8]]
+    assert isinstance(ds2,DataSet)
+    test_ds(ds,ds2,[4,7,2,8])
+    del ds2
 
-        #print arr
-        for i, x in enumerate(a.minibatches(["x"], minibatch_size=2, n_batches=8)):
-            #print 'x' , x
-            self.failUnless(numpy.all( x == arr2[i*2:i*2+2,0:2]))
+    #ds.<property># returns the value of a property associated with
+      #the name <property>. The following properties should be supported:
+      #    - 'description': a textual description or name for the ds
+      #    - 'fieldtypes': a list of types (one per field)
 
-    def test_minibatch_wraparound_odd(self):
-        arr = numpy.random.rand(10,4)
-        arr2 = ArrayDataSet.Iterator.matcat(arr,arr)
+    #* ds1 | ds2 | ds3 == ds.hstack([ds1,ds2,ds3])#????
+        #assert hstack([ds('x','y'),ds('z')])==ds
+        #hstack([ds('z','y'),ds('x')])==ds
+    assert have_raised2(hstack,[ds('x'),ds('x')])
+    assert have_raised2(hstack,[ds('y','x'),ds('x')])
+    assert not have_raised2(hstack,[ds('x'),ds('y')])
+        
+    #        i=0
+    #        for example in hstack([ds('x'),ds('y'),ds('z')]):
+    #            example==ds[i]
+    #            i+=1 
+    #        del i,example
+    #* ds1 & ds2 & ds3 == ds.vstack([ds1,ds2,ds3])#????
 
-        a=ArrayDataSet(data=arr,fields={"x":slice(2),"y":slice(1,4)})
-
-        for i, x in enumerate(a.minibatches(["x"], minibatch_size=3, n_batches=6)):
-            self.failUnless(numpy.all( x == arr2[i*3:i*3+3,0:2]))
+def test_fields_fct(ds):
+    #@todo, fill correctly
+    assert len(ds.fields())==3
+    i=0
+    v=0
+    for field in ds.fields():
+        for field_value in field: # iterate over the values associated to that field for all the ds examples
+            v+=1
+        i+=1
+    assert i==3
+    assert v==3*10
+    del i,v
+    
+    i=0
+    v=0
+    for field in ds('x','z').fields():
+        i+=1
+        for val in field:
+            v+=1
+    assert i==2
+    assert v==2*10
+    del i,v
+    
+    i=0
+    v=0
+    for field in ds.fields('x','y'):
+        i+=1
+        for val in field:
+            v+=1
+    assert i==2
+    assert v==2*10
+    del i,v
     
+    i=0
+    v=0
+    for field_examples in ds.fields():
+        for example_value in field_examples:
+            v+=1
+        i+=1
+    assert i==3
+    assert v==3*10
+    del i,v
+    
+    assert ds == ds.fields().examples()
+    assert len(ds('x','y').fields()) == 2
+    assert len(ds('x','z').fields()) == 2
+    assert len(ds('y').fields()) == 1
 
-class T_renamingdataset(unittest.TestCase):
-    def setUp(self):
-        numpy.random.seed(123456)
+    del field
+def test_all(array,ds):
+    assert len(ds)==10
+
+    test_iterate_over_examples(array, ds)
+    test_getitem(array, ds)
+    test_ds_iterator(array,ds('x','y'),ds('y','z'),ds('x','y','z'))
+    test_fields_fct(ds)
+
+class T_DataSet(unittest.TestCase):
+    def test_ArrayDataSet(self):
+        #don't test stream
+        #tested only with float value
+        #don't always test with y
+        #don't test missing value
+        #don't test with tuple
+        #don't test proterties
+        a2 = numpy.random.rand(10,4)
+        ds = ArrayDataSet(a2,{'x':slice(3),'y':3,'z':[0,2]})###???tuple not tested
+        ds = ArrayDataSet(a2,Example(['x','y','z'],[slice(3),3,[0,2]]))###???tuple not tested
+        #assert ds==a? should this work?
+
+        test_all(a2,ds)
+
+        del a2, ds
+
+    def test_CachedDataSet(self):
+        a = numpy.random.rand(10,4)
+        ds1 = ArrayDataSet(a,Example(['x','y','z'],[slice(3),3,[0,2]]))###???tuple not tested
+        ds2 = CachedDataSet(ds1)
+        ds3 = CachedDataSet(ds1,cache_all_upon_construction=True)
+
+        test_all(a,ds2)
+        test_all(a,ds3)
+
+        del a,ds1,ds2,ds3
 
 
-    def test_hasfield(self):
-        n = numpy.random.rand(3,8)
-        a=ArrayDataSet(data=n,fields={"x":slice(2),"y":slice(1,4),"z":slice(4,6)})
-        b=a.rename({'xx':'x','zz':'z'})
-        self.failUnless(b.hasFields('xx','zz') and not b.hasFields('x') and not b.hasFields('y'))
+    def test_DataSetFields(self):
+        raise NotImplementedError()
 
-class T_applyfunctiondataset(unittest.TestCase):
-    def setUp(self):
-        numpy.random.seed(123456)
+    def test_ApplyFunctionDataSet(self):
+        a = numpy.random.rand(10,4)
+        a2 = a+1
+        ds1 = ArrayDataSet(a,Example(['x','y','z'],[slice(3),3,[0,2]]))###???tuple not tested
+
+        ds2 = ApplyFunctionDataSet(ds1,lambda x,y,z: (x+1,y+1,z+1), ['x','y','z'],minibatch_mode=False)
+        ds3 = ApplyFunctionDataSet(ds1,lambda x,y,z: (numpy.array(x)+1,numpy.array(y)+1,numpy.array(z)+1),
+                                   ['x','y','z'],
+                                   minibatch_mode=True)
 
-    def test_function(self):
-        n = numpy.random.rand(3,8)
-        a=ArrayDataSet(data=n,fields={"x":slice(2),"y":slice(1,4),"z":slice(4,6)})
-        b=a.apply_function(lambda x,y: x+y,x+1, ['x','y'], ['x+y','x+1'], False,False,False)
-        print b.fieldNames()
-        print b('x+y')
-        
+        test_all(a2,ds2)
+        test_all(a2,ds3)
 
+        del a,ds1,ds2,ds3
 
-
-# to be used with a any new dataset
-class T_dataset_tester(object):
-    """
-    This class' goal is to test any new dataset that is created
-    Tests are (will be!) designed to check the normal behaviours
-    of a dataset, as defined in dataset.py
-    """
+    def test_FieldsSubsetDataSet(self):
+        raise NotImplementedError()
+    def test_MinibatchDataSet(self):
+        raise NotImplementedError()
+    def test_HStackedDataSet(self):
+        raise NotImplementedError()
+    def test_VStackedDataSet(self):
+        raise NotImplementedError()
+    def test_ArrayFieldsDataSet(self):
+        raise NotImplementedError()
 
 
-    def __init__(self,ds,runall=True) :
-        """if interested in only a subset of test, init with runall=False"""
-        self.ds = ds
-        
-        if runall :
-            self.test1_basicstats(ds)
-            self.test2_slicing(ds)
-            self.test3_fields_iterator_consistency(ds)
-
-    def test1_basicstats(self,ds) :
-        """print basics stats on a dataset, like length"""
-
-        print 'len(ds) = ',len(ds)
-        print 'num fields = ', len(ds.fieldNames())
-        print 'types of field: ',
-        for k in ds.fieldNames() :
-            print type(ds[0](k)[0]),
-        print ''
+if __name__=='__main__':
+    unittest.main()
 
-    def test2_slicing(self,ds) :
-        """test if slicing works properly"""
-        print 'testing slicing...',
-        sys.stdout.flush()
-        
-        middle = len(ds) / 2
-        tenpercent = int(len(ds) * .1)
-        set1 = ds[:middle+tenpercent]
-        set2 = ds[middle-tenpercent:]
-        for k in range(tenpercent + tenpercent -1):
-            for k2 in ds.fieldNames() :
-                if type(set1[middle-tenpercent+k](k2)[0]) == N.ndarray :
-                    for k3 in range(len(set1[middle-tenpercent+k](k2)[0])) :
-                        assert set1[middle-tenpercent+k](k2)[0][k3] == set2[k](k2)[0][k3]
-                else :
-                    assert set1[middle-tenpercent+k](k2)[0] == set2[k](k2)[0]
-        assert tenpercent > 1
-        set3 = ds[middle-tenpercent:middle+tenpercent:2]
-        for k2 in ds.fieldNames() :
-            if type(set2[2](k2)[0]) == N.ndarray :
-                for k3 in range(len(set2[2](k2)[0])) :
-                    assert set2[2](k2)[0][k3] == set3[1](k2)[0][k3]
-            else :
-                assert set2[2](k2)[0] == set3[1](k2)[0]
-
-        print 'done'
-
-
-    def test3_fields_iterator_consistency(self,ds) :
-        """ check if the number of iterator corresponds to the number of fields"""
-        print 'testing fields/iterator consistency...',
-        sys.stdout.flush()
-
-        # basic test
-        maxsize = min(len(ds)-1,100)
-        for iter in ds[:maxsize] :
-            assert len(iter) == len(ds.fieldNames())
-        if len(ds.fieldNames()) == 1 :
-            print 'done'
-            return
-
-        # with minibatches iterator
-        ds2 = ds.minibatches[:maxsize]([ds.fieldNames()[0],ds.fieldNames()[1]],minibatch_size=2)
-        for iter in ds2 :
-            assert len(iter) == 2
-
-        print 'done'
-
-
-
-
-
-###################################################################
-# main
-if __name__ == '__main__':
-    unittest.main()
-    
diff -r 4e6b550fe131 -r 174374d59405 _test_filetensor.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/_test_filetensor.py	Fri Jun 06 15:56:18 2008 -0400
@@ -0,0 +1,116 @@
+from filetensor import *
+import filetensor
+
+import unittest
+import os
+
+class T(unittest.TestCase):
+    fname = '/tmp/some_mat'
+
+    def setUp(self):
+        #TODO: test that /tmp/some_mat does not exist
+        try:
+            os.stat(self.fname)
+        except OSError:
+            return #assume file was not found
+        raise Exception('autotest file "%s" exists!' % self.fname)
+
+    def tearDown(self):
+        os.remove(self.fname)
+
+    def test_file(self):
+        gen = numpy.random.rand(1)
+        f = file(self.fname, 'w');
+        write(f, gen)
+        f.flush()
+        f = file(self.fname, 'r');
+        mat = read(f, None, debug=False) #load from filename
+        self.failUnless(gen.shape == mat.shape)
+        self.failUnless(numpy.all(gen == mat))
+
+    def test_filename(self):
+        gen = numpy.random.rand(1)
+        write(self.fname, gen)
+        mat = read(self.fname, None, debug=False) #load from filename
+        self.failUnless(gen.shape == mat.shape)
+        self.failUnless(numpy.all(gen == mat))
+
+    def testNd(self):
+        """shape and values are stored correctly for tensors of rank 0 to 5"""
+        whole_shape = [5, 6, 7, 8, 9]
+        for i in xrange(5):
+            gen = numpy.asarray(numpy.random.rand(*whole_shape[:i]))
+            f = file(self.fname, 'w');
+            write(f, gen)
+            f.flush()
+            f = file(self.fname, 'r');
+            mat = read(f, None, debug=False) #load from filename
+            self.failUnless(gen.shape == mat.shape)
+            self.failUnless(numpy.all(gen == mat))
+
+    def test_dtypes(self):
+        """shape and values are stored correctly for all dtypes """
+        for dtype in filetensor._dtype_magic:
+            gen = numpy.asarray(
+                    numpy.random.rand(4, 5, 2, 1) * 100,
+                    dtype=dtype)
+            f = file(self.fname, 'w');
+            write(f, gen)
+            f.flush()
+            f = file(self.fname, 'r');
+            mat = read(f, None, debug=False) #load from filename
+            self.failUnless(gen.dtype == mat.dtype)
+            self.failUnless(gen.shape == mat.shape)
+            self.failUnless(numpy.all(gen == mat))
+
+    def test_dtype_invalid(self):
+        gen = numpy.zeros((3,4), dtype='uint16') #an unsupported dtype
+        f = file(self.fname, 'w')
+        passed = False
+        try:
+            write(f, gen)
+        except TypeError, e:
+            if e[0].startswith('Invalid ndarray dtype'):
+                passed = True
+        f.close()
+        self.failUnless(passed)
+        
+
+if __name__ == '__main__':
+    unittest.main()
+
+    #a small test script, starts by reading sys.argv[1]
+    #print 'rval', rval.shape, rval.size
+
+    if 0:
+        write(f, rval)
+        print ''
+        f.close()
+        f = file('/tmp/some_mat', 'r');
+        rval2 = read(f) #load from file handle
+        print 'rval2', rval2.shape, rval2.size
+
+        assert rval.dtype == rval2.dtype
+        assert rval.shape == rval2.shape
+        assert numpy.all(rval == rval2)
+        print 'ok'
+
+    def _unused():
+        f.seek(0,2) #seek to end
+        f_len =  f.tell()
+        f.seek(f_data_start,0) #seek back to where we were
+
+        if debug: print 'length:', f_len
+
+
+        f_data_bytes = (f_len - f_data_start)
+
+        if debug: print 'data bytes according to header: ', dim_size * elsize
+        if debug: print 'data bytes according to file  : ', f_data_bytes
+
+        if debug: print 'reading data...'
+        sys.stdout.flush()
+
+    def read_ndarray(f, dim, dtype):
+        return numpy.fromfile(f, dtype=dtype, count=_prod(dim)).reshape(dim)
+
diff -r 4e6b550fe131 -r 174374d59405 _test_lookup_list.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/_test_lookup_list.py	Fri Jun 06 15:56:18 2008 -0400
@@ -0,0 +1,24 @@
+from lookup_list import *
+import unittest
+
+class T_LookUpList(unittest.TestCase):
+    def test_LookupList(self):
+        #test only the example in the doc???
+        example = LookupList(['x','y','z'],[1,2,3])
+        example['x'] = [1, 2, 3] # set or change a field
+        x, y, z = example
+        x = example[0]
+        x = example["x"]
+        assert example.keys()==['x','y','z']
+        assert example.values()==[[1,2,3],2,3]
+        assert example.items()==[('x',[1,2,3]),('y',2),('z',3)]
+        example.append_keyval('u',0) # adds item with name 'u' and value 0
+        assert len(example)==4 # number of items = 4 here
+        example2 = LookupList(['v','w'], ['a','b'])
+        example3 = LookupList(['x','y','z','u','v','w'], [[1, 2, 3],2,3,0,'a','b'])
+        assert example+example2==example3
+        self.assertRaises(AssertionError,example.__add__,example)
+        del example, example2, example3, x, y ,z
+
+if __name__=='__main__':
+    unittest.main()
diff -r 4e6b550fe131 -r 174374d59405 _test_nnet_ops.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/_test_nnet_ops.py	Fri Jun 06 15:56:18 2008 -0400
@@ -0,0 +1,41 @@
+
+import unittest
+import theano._test_tensor as TT
+import numpy
+
+from nnet_ops import *
+
+class T_sigmoid(unittest.TestCase):
+    def setUp(self):
+        numpy.random.seed(9999)
+    def test_elemwise(self):
+        TT.verify_grad(self, sigmoid, [numpy.random.rand(3,4)])
+
+class T_softplus(unittest.TestCase):
+    def setUp(self):
+        numpy.random.seed(9999)
+    def test_elemwise(self):
+        TT.verify_grad(self, softplus, [numpy.random.rand(3,4)])
+
+class T_CrossentropySoftmax1Hot(unittest.TestCase):
+    def setUp(self):
+        numpy.random.seed(9999)
+    def test0(self):
+        y_idx = [0,1,3]
+        class Dummy(object):
+            def make_node(self, a,b):
+                return crossentropy_softmax_1hot_with_bias(a, b, y_idx)[0:1]
+        TT.verify_grad(self, Dummy(), [numpy.random.rand(3,4),
+            numpy.random.rand(4)])
+
+    def test1(self):
+        y_idx = [0,1,3]
+        class Dummy(object):
+            def make_node(self, a):
+                return crossentropy_softmax_1hot(a, y_idx)[0:1]
+        TT.verify_grad(self, Dummy(), [numpy.random.rand(3,4)])
+
+
+
+if __name__ == '__main__':
+    unittest.main()
diff -r 4e6b550fe131 -r 174374d59405 autotest.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/autotest.py	Fri Jun 06 15:56:18 2008 -0400
@@ -0,0 +1,54 @@
+import unittest, os, sys, traceback
+
+def test_root_dir(debugmode=False):
+    suite = None
+    filenames = os.listdir('.')
+    for filename in filenames:
+        if filename[-3:] == '.py' and filename.startswith('_test'):
+            #print >>sys.stderr, 'Loading', modname
+            modname = filename[0:-3]
+
+            try:
+                module = __import__(modname)
+            except Exception, e:
+                print >>sys.stderr, "===================================================="
+                print >>sys.stderr, "Failed to load %s.py" % modname
+                print >>sys.stderr, "===================================================="
+                traceback.print_exc()
+                print >>sys.stderr, "===================================================="
+                continue
+                
+            tests = unittest.TestLoader().loadTestsFromModule(module)
+            if tests.countTestCases() > 0:
+                print >>sys.stderr, 'Testing', modname
+                if suite is None:
+                    suite = tests
+                else:
+                    suite.addTests(tests)
+    if suite is None:
+        print >>sys.stderr, "No suite found"
+        sys.exit(1)
+    if debugmode:
+        suite.debug()
+    else:
+        unittest.TextTestRunner(verbosity=1).run(suite)
+
+if __name__ == '__main__':
+
+    def printUsage():
+        print >>sys.stderr, "Bad argument: ",sys.argv
+        print >>sys.stderr, "only --debug is supported"
+        sys.exit(1)
+    debugparam=""
+
+    if len(sys.argv)==2:
+        if sys.argv[1]=="--debug":
+            debugparam="--debug"
+            sys.argv.remove(debugparam)
+        else:
+            printUsage()
+    elif len(sys.argv)>2:
+        printUsage()
+
+    test_root_dir(debugparam!="")
+
diff -r 4e6b550fe131 -r 174374d59405 dataset.py
--- a/dataset.py	Thu Jun 05 18:43:16 2008 -0400
+++ b/dataset.py	Fri Jun 06 15:56:18 2008 -0400
@@ -161,17 +161,55 @@
     numpy_vstack = lambda fieldname,values: numpy.vstack(values)
     numpy_hstack = lambda fieldnames,values: numpy.hstack(values)
         
-    def __init__(self,description=None,fieldtypes=None):
-        if description is None:
-            # by default return "<DataSetType>(<SuperClass1>,<SuperClass2>,...)"
-            description = type(self).__name__ + " ( " + join([x.__name__ for x in type(self).__bases__]) + " )"
-        self.description=description
-        self.fieldtypes=fieldtypes
+    def __init__(self, description=None, fieldnames=None, fieldtypes=None):
+        """
+        @type fieldnames: list of strings
+        @type fieldtypes: list of python types, same length as fieldnames
+        @type description: string 
+        @param description: description/name for this dataset
+        """
+        def default_desc():
+            return type(self).__name__ \
+                    + " ( " + join([x.__name__ for x in type(self).__bases__]) + " )"
+
+        #self.fieldnames = fieldnames
+
+        self.fieldtypes = fieldtypes if fieldtypes is not None \
+                else [None]*1 #len(fieldnames)
+
+        self.description =  default_desc() if description is None \
+                else description
         self._attribute_names = ["description"]
-        if fieldtypes:
-            self._attribute_names.append("fieldtypes")
+
+    attributeNames = property(lambda self: copy.copy(self._attribute_names))
+
+    def __contains__(self, fieldname):
+        return (fieldname in self.fieldNames()) \
+                or (fieldname in self.attributeNames())
+
+    def __iter__(self):
+        """Supports the syntax "for i in dataset: ..."
 
-    def attributeNames(self): return self._attribute_names
+        Using this syntax, "i" will be an Example instance (or equivalent) with
+        all the fields of DataSet self.  Every field of "i" will give access to
+        a field of a single example.  Fields should be accessible via
+        i["fielname"] or i[3] (in the order defined by the elements of the
+        Example returned by this iterator), but the derived class is free
+        to accept any type of identifier, and add extra functionality to the iterator.
+
+        The default implementation calls the minibatches iterator and extracts the first example of each field.
+        """
+        return DataSet.MinibatchToSingleExampleIterator(self.minibatches(None, minibatch_size = 1))
+
+    def __len__(self):
+        """
+        len(dataset) returns the number of examples in the dataset.
+        By default, a DataSet is a 'stream', i.e. it has an unbounded length (sys.maxint).
+        Sub-classes which implement finite-length datasets should redefine this method.
+        Some methods only make sense for finite-length datasets.
+        """
+        return None
+
 
     class MinibatchToSingleExampleIterator(object):
         """
@@ -198,24 +236,6 @@
         def next_index(self):
             return self.minibatch_iterator.next_index()
 
-    def __iter__(self):
-        """Supports the syntax "for i in dataset: ..."
-
-        Using this syntax, "i" will be an Example instance (or equivalent) with
-        all the fields of DataSet self.  Every field of "i" will give access to
-        a field of a single example.  Fields should be accessible via
-        i["fielname"] or i[3] (in the order defined by the elements of the
-        Example returned by this iterator), but the derived class is free
-        to accept any type of identifier, and add extra functionality to the iterator.
-
-        The default implementation calls the minibatches iterator and extracts the first example of each field.
-        """
-        return DataSet.MinibatchToSingleExampleIterator(self.minibatches(None, minibatch_size = 1))
-
-    def __contains__(self, fieldname):
-        return (fieldname in self.fieldNames()) \
-                or (fieldname in self.attributeNames())
-
     class MinibatchWrapAroundIterator(object):
         """
         An iterator for minibatches that handles the case where we need to wrap around the
@@ -358,15 +378,6 @@
         """
         raise AbstractFunction()
 
-    def __len__(self):
-        """
-        len(dataset) returns the number of examples in the dataset.
-        By default, a DataSet is a 'stream', i.e. it has an unbounded length (sys.maxint).
-        Sub-classes which implement finite-length datasets should redefine this method.
-        Some methods only make sense for finite-length datasets.
-        """
-        return maxint
-
     def is_unbounded(self):
         """
         Tests whether a dataset is unbounded (e.g. a stream).
diff -r 4e6b550fe131 -r 174374d59405 learner.py
--- a/learner.py	Thu Jun 05 18:43:16 2008 -0400
+++ b/learner.py	Fri Jun 06 15:56:18 2008 -0400
@@ -110,7 +110,7 @@
         """
         raise AbstractFunction()
     
-class LearnerModel(LearnedModel):
+class LearnerModel(TrainedModel):
     """
     LearnerModel is a base class for models returned by instances of a LearningAlgorithm subclass.
     It is only given here to define the expected semantics.
diff -r 4e6b550fe131 -r 174374d59405 mlp.py
--- a/mlp.py	Thu Jun 05 18:43:16 2008 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,240 +0,0 @@
-"""
-A straightforward classicial feedforward
-one-hidden-layer neural net, with L2 regularization.
-This is one of the simplest example of L{Learner}, and illustrates
-the use of theano.
-"""
-
-from learner import *
-from theano import tensor as t
-from nnet_ops import *
-import math
-from misc import *
-
-def function(inputs, outputs, linker='c&py'):
-    return theano.function(inputs, outputs, unpack_single=False,linker=linker)
-
-def randshape(*shape): return (numpy.random.rand(*shape) -0.5) * 0.001
-
-class ManualNNet(object):
-    def __init__(self, ninputs, nhid, nclass, lr, nepochs, 
-            linker='c&yp', 
-            hidden_layer=None):
-        class Vars:
-            def __init__(self, lr, l2coef=0.0):
-                lr = t.constant(lr)
-                l2coef = t.constant(l2coef)
-                input = t.matrix('input') # n_examples x n_inputs
-                target = t.ivector('target') # n_examples x 1
-                W2 = t.matrix('W2')
-                b2 = t.vector('b2')
-
-                if hidden_layer:
-                    hid, hid_params, hid_ivals, hid_regularization = hidden_layer(input)
-                else:
-                    W1 = t.matrix('W1')
-                    b1 = t.vector('b1')
-                    hid = t.tanh(b1 + t.dot(input, W1))
-                    hid_params = [W1, b1]
-                    hid_regularization = l2coef * t.sum(W1*W1)
-                    hid_ivals = [randshape(ninputs, nhid), randshape(nhid)]
-
-                params = [W2, b2] + hid_params
-                ivals = [randshape(nhid, nclass), randshape(nclass)]\
-                        + hid_ivals
-                nll, predictions = crossentropy_softmax_1hot( b2 + t.dot(hid, W2), target)
-                regularization = l2coef * t.sum(W2*W2) + hid_regularization
-                output_class = t.argmax(predictions,1)
-                loss_01 = t.neq(output_class, target)
-                g_params = t.grad(nll + regularization, params)
-                new_params = [t.sub_inplace(p, lr * gp) for p,gp in zip(params, g_params)]
-                self.__dict__.update(locals()); del self.self
-        self.nhid = nhid
-        self.nclass = nclass
-        self.nepochs = nepochs
-        self.v = Vars(lr)
-        self.params = None
-
-    def update(self, trainset):
-        params = self.v.ivals
-        update_fn = function(
-                [self.v.input, self.v.target] + self.v.params,
-                [self.v.nll] + self.v.new_params)
-        for i in xrange(self.nepochs):
-            for input, target in trainset.minibatches(['input', 'target'],
-                    minibatch_size=min(32, len(trainset))):
-                dummy = update_fn(input, target[:,0], *params)
-                if 0: print dummy[0] #the nll
-        return self.use
-    __call__ = update
-
-    def use(self, dset,
-            output_fieldnames=['output_class'],
-            test_stats_collector=None,
-            copy_inputs=False,
-            put_stats_in_output_dataset=True,
-            output_attributes=[]):
-        inputs = [self.v.input, self.v.target] + self.v.params
-        fn = function(inputs, [getattr(self.v, name) for name in output_fieldnames])
-        target = dset.fields()['target'] if ('target' in dset.fields()) else numpy.zeros((1,1),dtype='int64')
-        return ApplyFunctionDataSet(dset, 
-            lambda input, target: fn(input, target[:,0], *self.v.ivals),
-            output_fieldnames)
-
-
-class OneHiddenLayerNNetClassifier(OnlineGradientTLearner):
-    """
-    Implement a straightforward classicial feedforward
-    one-hidden-layer neural net, with L2 regularization.
-
-    The predictor parameters are obtained by minibatch/online gradient descent.
-    Training can proceed sequentially (with multiple calls to update with
-    different disjoint subsets of the training sets).
-
-    Hyper-parameters:
-      - L2_regularizer
-      - learning_rate
-      - n_hidden
-
-    For each (input_t,output_t) pair in a minibatch,::
-
-       output_activations_t = b2+W2*tanh(b1+W1*input_t)
-       output_t = softmax(output_activations_t)
-       output_class_t = argmax(output_activations_t)
-       class_error_t = 1_{output_class_t != target_t}
-       nll_t = -log(output_t[target_t])
-
-    and the training criterion is::
-
-       loss = L2_regularizer*(||W1||^2 + ||W2||^2) + sum_t nll_t
-
-    The parameters are [b1,W1,b2,W2] and are obtained by minimizing the loss by
-    stochastic minibatch gradient descent::
-
-       parameters[i] -= learning_rate * dloss/dparameters[i]
-       
-    The fields and attributes expected and produced by use and update are the following:
-
-     - Input and output fields (example-wise quantities):
-
-       - 'input' (always expected by use and update)
-       - 'target' (optionally expected by use and always by update)
-       - 'output' (optionally produced by use)
-       - 'output_class' (optionally produced by use)
-       - 'class_error' (optionally produced by use)
-       - 'nll' (optionally produced by use)
-       
-     - optional attributes (optionally expected as input_dataset attributes)
-       (warning, this may be dangerous, the 'use' method will use those provided in the 
-       input_dataset rather than those learned during 'update'; currently no support
-       for providing these to update):
-       
-       - 'L2_regularizer'
-       - 'b1' 
-       - 'W1'
-       - 'b2' 
-       - 'W2'
-       - 'parameters' = [b1, W1, b2, W2]
-       - 'regularization_term'
-
-    """
-    def __init__(self,n_hidden,n_classes,learning_rate,max_n_epochs,L2_regularizer=0,init_range=1.,n_inputs=None,minibatch_size=None,linker='c|py'):
-        self._n_inputs = n_inputs
-        self._n_outputs = n_classes
-        self._n_hidden = n_hidden
-        self._init_range = init_range
-        self._max_n_epochs = max_n_epochs
-        self._minibatch_size = minibatch_size
-        self.learning_rate = learning_rate # this is the float
-        self.L2_regularizer = L2_regularizer
-        self._learning_rate = t.scalar('learning_rate') # this is the symbol
-        self._input = t.matrix('input') # n_examples x n_inputs
-        self._target = t.lmatrix('target') # n_examples x 1
-        self._target_vector = self._target[:,0]
-        self._L2_regularizer = t.scalar('L2_regularizer')
-        self._W1 = t.matrix('W1')
-        self._W2 = t.matrix('W2')
-        self._b1 = t.row('b1')
-        self._b2 = t.row('b2')
-        self._regularization_term = self._L2_regularizer * (t.sum(self._W1*self._W1) + t.sum(self._W2*self._W2))
-        self._output_activations =self._b2+t.dot(t.tanh(self._b1+t.dot(self._input,self._W1.T)),self._W2.T)
-        self._nll,self._output = crossentropy_softmax_1hot(self._output_activations,self._target_vector)
-        self._output_class = t.argmax(self._output,1)
-        self._class_error = t.neq(self._output_class,self._target_vector)
-        self._minibatch_criterion = self._nll + self._regularization_term / t.shape(self._input)[0]
-        OnlineGradientTLearner.__init__(self, linker = linker)
-            
-    def attributeNames(self):
-        return ["parameters","b1","W2","b2","W2", "L2_regularizer","regularization_term"]
-
-    def parameterAttributes(self):
-        return ["b1","W1", "b2", "W2"]
-    
-    def updateMinibatchInputFields(self):
-        return ["input","target"]
-    
-    def updateMinibatchInputAttributes(self):
-        return OnlineGradientTLearner.updateMinibatchInputAttributes(self)+["L2_regularizer"]
-    
-    def updateEndOutputAttributes(self):
-        return ["regularization_term"]
-
-    def lossAttribute(self):
-        return "minibatch_criterion"
-    
-    def defaultOutputFields(self, input_fields):
-        output_fields = ["output", "output_class",]
-        if "target" in input_fields:
-            output_fields += ["class_error", "nll"]
-        return output_fields
-        
-    def updateMinibatch(self,minibatch):
-        MinibatchUpdatesTLearner.updateMinibatch(self,minibatch)
-        #print self.nll
-
-    def allocate(self,minibatch):
-        minibatch_n_inputs  = minibatch["input"].shape[1]
-        if not self._n_inputs:
-            self._n_inputs = minibatch_n_inputs
-            self.b1 = numpy.zeros((1,self._n_hidden))
-            self.b2 = numpy.zeros((1,self._n_outputs))
-            self.forget()
-        elif self._n_inputs!=minibatch_n_inputs:
-            # if the input changes dimension on the fly, we resize and forget everything
-            self.forget()
-            
-    def forget(self):
-        if self._n_inputs:
-            r = self._init_range/math.sqrt(self._n_inputs)
-            self.W1 = numpy.random.uniform(low=-r,high=r,
-                                           size=(self._n_hidden,self._n_inputs))
-            r = self._init_range/math.sqrt(self._n_hidden)
-            self.W2 = numpy.random.uniform(low=-r,high=r,
-                                           size=(self._n_outputs,self._n_hidden))
-            self.b1[:]=0
-            self.b2[:]=0
-            self._n_epochs=0
-
-    def isLastEpoch(self):
-        self._n_epochs +=1
-        return self._n_epochs>=self._max_n_epochs
-
-    def debug_updateMinibatch(self,minibatch):
-        # make sure all required fields are allocated and initialized
-        self.allocate(minibatch)
-        input_attributes = self.names2attributes(self.updateMinibatchInputAttributes())
-        input_fields = minibatch(*self.updateMinibatchInputFields())
-        print 'input attributes', input_attributes
-        print 'input fields', input_fields
-        results = self.update_minibatch_function(*(input_attributes+input_fields))
-        print 'output attributes', self.updateMinibatchOutputAttributes()
-        print 'results', results
-        self.setAttributes(self.updateMinibatchOutputAttributes(),
-                           results)
-
-        if 0:
-            print 'n0', self.names2OpResults(self.updateMinibatchOutputAttributes()+ self.updateMinibatchInputFields())
-            print 'n1', self.names2OpResults(self.updateMinibatchOutputAttributes())
-            print 'n2', self.names2OpResults(self.updateEndInputAttributes())
-            print 'n3', self.names2OpResults(self.updateEndOutputAttributes())
-
diff -r 4e6b550fe131 -r 174374d59405 test_dataset.py
--- a/test_dataset.py	Thu Jun 05 18:43:16 2008 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,561 +0,0 @@
-#!/bin/env python
-from dataset import *
-from math import *
-import numpy
-from misc import *
-
-def have_raised(to_eval, **var):
-    have_thrown = False
-    try:
-        eval(to_eval)
-    except :
-        have_thrown = True
-    return have_thrown
-
-def have_raised2(f, *args, **kwargs):
-    have_thrown = False
-    try:
-        f(*args, **kwargs)
-    except :
-        have_thrown = True
-    return have_thrown
-
-def test1():
-    print "test1"
-    global a,ds
-    a = numpy.random.rand(10,4)
-    print a
-    ds = ArrayDataSet(a,{'x':slice(3),'y':3,'z':[0,2]})
-    print "len(ds)=",len(ds)
-    assert(len(ds)==10)
-    print "example 0 = ",ds[0]
-#    assert
-    print "x=",ds["x"]
-    print "x|y"
-    for x,y in ds("x","y"):
-        print x,y
-    minibatch_iterator = ds.minibatches(fieldnames=['z','y'],n_batches=1,minibatch_size=3,offset=4)
-    minibatch = minibatch_iterator.__iter__().next()
-    print "minibatch=",minibatch
-    for var in minibatch:
-        print "var=",var
-    print "take a slice and look at field y",ds[1:6:2]["y"]
-
-    del a,ds,x,y,minibatch_iterator,minibatch,var
-
-def test_iterate_over_examples(array,ds):
-#not in doc!!!
-    i=0
-    for example in range(len(ds)):
-        assert (ds[example]['x']==array[example][:3]).all()
-        assert ds[example]['y']==array[example][3]
-        assert (ds[example]['z']==array[example][[0,2]]).all()
-        i+=1
-    assert i==len(ds)
-    del example,i
-
-#     - for example in dataset:
-    i=0
-    for example in ds:
-        assert len(example)==3
-        assert (example['x']==array[i][:3]).all()
-        assert example['y']==array[i][3]
-        assert (example['z']==array[i][0:3:2]).all()
-        assert (numpy.append(example['x'],example['y'])==array[i]).all()
-        i+=1
-    assert i==len(ds)
-    del example,i
-
-#     - for val1,val2,... in dataset:
-    i=0
-    for x,y,z in ds:
-        assert (x==array[i][:3]).all()
-        assert y==array[i][3]
-        assert (z==array[i][0:3:2]).all()
-        assert (numpy.append(x,y)==array[i]).all()
-        i+=1
-    assert i==len(ds)
-    del x,y,z,i
-
-#     - for example in dataset(field1, field2,field3, ...):
-    i=0
-    for example in ds('x','y','z'):
-        assert len(example)==3
-        assert (example['x']==array[i][:3]).all()
-        assert example['y']==array[i][3]
-        assert (example['z']==array[i][0:3:2]).all()
-        assert (numpy.append(example['x'],example['y'])==array[i]).all()
-        i+=1
-    assert i==len(ds)
-    del example,i
-    i=0
-    for example in ds('y','x'):
-        assert len(example)==2
-        assert (example['x']==array[i][:3]).all()
-        assert example['y']==array[i][3]
-        assert (numpy.append(example['x'],example['y'])==array[i]).all()
-        i+=1
-    assert i==len(ds)
-    del example,i
-
-#     - for val1,val2,val3 in dataset(field1, field2,field3):
-    i=0
-    for x,y,z in ds('x','y','z'):
-        assert (x==array[i][:3]).all()
-        assert y==array[i][3]
-        assert (z==array[i][0:3:2]).all()
-        assert (numpy.append(x,y)==array[i]).all()
-        i+=1
-    assert i==len(ds)
-    del x,y,z,i
-    i=0
-    for y,x in ds('y','x',):
-        assert (x==array[i][:3]).all()
-        assert y==array[i][3]
-        assert (numpy.append(x,y)==array[i]).all()
-        i+=1
-    assert i==len(ds)
-    del x,y,i
-
-    def test_minibatch_size(minibatch,minibatch_size,len_ds,nb_field,nb_iter_finished):
-        ##full minibatch or the last minibatch
-        for idx in range(nb_field):
-            test_minibatch_field_size(minibatch[idx],minibatch_size,len_ds,nb_iter_finished)
-        del idx
-    def test_minibatch_field_size(minibatch_field,minibatch_size,len_ds,nb_iter_finished):
-        assert len(minibatch_field)==minibatch_size or ((nb_iter_finished*minibatch_size+len(minibatch_field))==len_ds and len(minibatch_field)<minibatch_size)
-
-#     - for minibatch in dataset.minibatches([field1, field2, ...],minibatch_size=N):
-    i=0
-    mi=0
-    m=ds.minibatches(['x','z'], minibatch_size=3)
-    assert isinstance(m,DataSet.MinibatchWrapAroundIterator)
-    for minibatch in m:
-        assert isinstance(minibatch,DataSetFields)
-        assert len(minibatch)==2
-        test_minibatch_size(minibatch,m.minibatch_size,len(ds),2,mi)
-        if type(ds)==ArrayDataSet:
-            assert (minibatch[0][:,::2]==minibatch[1]).all()
-        else:
-            for j in xrange(len(minibatch[0])):
-                (minibatch[0][j][::2]==minibatch[1][j]).all()
-        mi+=1
-        i+=len(minibatch[0])
-    assert i==len(ds)
-    assert mi==4
-    del minibatch,i,m,mi
-
-    i=0
-    mi=0
-    m=ds.minibatches(['x','y'], minibatch_size=3)
-    assert isinstance(m,DataSet.MinibatchWrapAroundIterator)
-    for minibatch in m:
-        assert len(minibatch)==2
-        test_minibatch_size(minibatch,m.minibatch_size,len(ds),2,mi)
-        mi+=1
-        for id in range(len(minibatch[0])):
-            assert (numpy.append(minibatch[0][id],minibatch[1][id])==array[i]).all()
-            i+=1
-    assert i==len(ds)
-    assert mi==4
-    del minibatch,i,id,m,mi
-
-#     - for mini1,mini2,mini3 in dataset.minibatches([field1, field2, field3], minibatch_size=N):
-    i=0
-    mi=0
-    m=ds.minibatches(['x','z'], minibatch_size=3)
-    assert isinstance(m,DataSet.MinibatchWrapAroundIterator)
-    for x,z in m:
-        test_minibatch_field_size(x,m.minibatch_size,len(ds),mi)
-        test_minibatch_field_size(z,m.minibatch_size,len(ds),mi)
-        for id in range(len(x)):
-            assert (x[id][::2]==z[id]).all()
-            i+=1
-        mi+=1
-    assert i==len(ds)
-    assert mi==4
-    del x,z,i,m,mi
-    i=0
-    mi=0
-    m=ds.minibatches(['x','y'], minibatch_size=3)
-    for x,y in m:
-        test_minibatch_field_size(x,m.minibatch_size,len(ds),mi)
-        test_minibatch_field_size(y,m.minibatch_size,len(ds),mi)
-        mi+=1
-        for id in range(len(x)):
-            assert (numpy.append(x[id],y[id])==array[i]).all()
-            i+=1
-    assert i==len(ds)
-    assert mi==4
-    del x,y,i,id,m,mi
-
-#not in doc
-    i=0
-    m=ds.minibatches(['x','y'],n_batches=1,minibatch_size=3,offset=4)
-    assert isinstance(m,DataSet.MinibatchWrapAroundIterator)
-    for x,y in m:
-        assert len(x)==m.minibatch_size
-        assert len(y)==m.minibatch_size
-        for id in range(m.minibatch_size):
-            assert (numpy.append(x[id],y[id])==array[i+4]).all()
-            i+=1
-    assert i==m.n_batches*m.minibatch_size
-    del x,y,i,id,m
-
-    i=0
-    m=ds.minibatches(['x','y'],n_batches=2,minibatch_size=3,offset=4)
-    assert isinstance(m,DataSet.MinibatchWrapAroundIterator)
-    for x,y in m:
-        assert len(x)==m.minibatch_size
-        assert len(y)==m.minibatch_size
-        for id in range(m.minibatch_size):
-            assert (numpy.append(x[id],y[id])==array[i+4]).all()
-            i+=1
-    assert i==m.n_batches*m.minibatch_size
-    del x,y,i,id,m
-
-    i=0
-    if 0: #this would trigger the bug mentioned in #25
-        test_offset = len(ds) - 1
-    else:
-        test_offset = 4
-    m=ds.minibatches(['x','y'],n_batches=20,minibatch_size=3,offset=test_offset)
-    assert isinstance(m,DataSet.MinibatchWrapAroundIterator)
-    for x,y in m:
-        assert len(x)==m.minibatch_size
-        assert len(y)==m.minibatch_size
-        for id in range(m.minibatch_size):
-            assert (numpy.append(x[id],y[id])==array[(i+test_offset)%array.shape[0]]).all()
-            i+=1
-    assert i==m.n_batches*m.minibatch_size
-    del x,y,i,id
-
-    if 0:
-        # Let's not put buggy behaviour as the target behaviour in the test
-        # suite. --JSB
-        assert have_raised2(ds.minibatches,['x','y'],n_batches=1,minibatch_size=len(array)+1,offset=0)
-    assert not have_raised2(ds.minibatches,['x','y'],n_batches=1,minibatch_size=len(array),offset=0)
-
-def test_ds_iterator(array,iterator1,iterator2,iterator3):
-    l=len(iterator1)
-    i=0
-    for x,y in iterator1:
-        assert (x==array[i][:3]).all()
-        assert y==array[i][3]
-        assert (numpy.append(x,y)==array[i]).all()
-        i+=1
-    assert i==l
-    i=0
-    for y,z in iterator2:
-        assert y==array[i][3]
-        assert (z==array[i][0:3:2]).all()
-        i+=1
-    assert i==l
-    i=0
-    for x,y,z in iterator3:
-        assert (x==array[i][:3]).all()
-        assert y==array[i][3]
-        assert (z==array[i][0:3:2]).all()
-        assert (numpy.append(x,y)==array[i]).all()
-        i+=1
-    assert i==l
-
-def test_getitem(array,ds):
-    def test_ds(orig,ds,index):
-        i=0
-        assert len(ds)==len(index)
-        for x,z,y in ds('x','z','y'):
-            assert (orig[index[i]]['x']==array[index[i]][:3]).all()
-            assert (orig[index[i]]['x']==x).all()
-            assert orig[index[i]]['y']==array[index[i]][3]
-            assert orig[index[i]]['y']==y
-            assert (orig[index[i]]['z']==array[index[i]][0:3:2]).all()
-            assert (orig[index[i]]['z']==z).all()
-            i+=1
-        del i
-        ds[0]
-        if len(ds)>2:
-            ds[:1]
-            ds[1:1]
-            ds[1:1:1]
-        if len(ds)>5:
-            ds[[1,2,3]]
-        for x in ds:
-            pass
-
-#ds[:n] returns a dataset with the n first examples.
-    ds2=ds[:3]
-    assert isinstance(ds2,DataSet)
-    test_ds(ds,ds2,index=[0,1,2])
-    del ds2
-
-#ds[i1:i2:s]# returns a ds with the examples i1,i1+s,...i2-s.
-    ds2=ds[1:7:2]
-    assert isinstance(ds2,DataSet)
-    test_ds(ds,ds2,[1,3,5])
-    del ds2
-
-#ds[i]
-    ds2=ds[5]
-    assert isinstance(ds2,Example)
-    if 0:
-        # see ticket #27
-        assert have_raised("var['ds']["+str(len(ds))+"]",ds=ds)  # index not defined
-    assert not have_raised("var['ds']["+str(len(ds)-1)+"]",ds=ds)
-    del ds2
-
-#ds[[i1,i2,...in]]# returns a ds with examples i1,i2,...in.
-    ds2=ds[[4,7,2,8]]
-    assert isinstance(ds2,DataSet)
-    test_ds(ds,ds2,[4,7,2,8])
-    del ds2
-
-#ds[fieldname]# an iterable over the values of the field fieldname across
-  #the ds (the iterable is obtained by default by calling valuesVStack
-  #over the values for individual examples).
-    if 0:
-        assert have_raised("ds['h']")  # h is not defined...
-        assert have_raised("ds[['x']]")  # bad syntax
-        assert not have_raised("var['ds']['x']",ds=ds)
-        isinstance(ds['x'],DataSetFields)
-        ds2=ds['x']
-        assert len(ds['x'])==10
-        assert len(ds['y'])==10
-        assert len(ds['z'])==10
-        i=0
-        for example in ds['x']:
-            assert (example==array[i][:3]).all()
-            i+=1
-        assert i==len(ds)
-        i=0
-        for example in ds['y']:
-            assert (example==array[i][3]).all()
-            i+=1
-        assert i==len(ds)
-        i=0
-        for example in ds['z']:
-            assert (example==array[i,0:3:2]).all()
-            i+=1
-        assert i==len(ds)
-        del ds2,i
-    else:
-        print 'warning: ds[fieldname] is deprecated... Fred could you fix this test?'
-
-    #ds.<property># returns the value of a property associated with
-      #the name <property>. The following properties should be supported:
-      #    - 'description': a textual description or name for the ds
-      #    - 'fieldtypes': a list of types (one per field)
-
-    #* ds1 | ds2 | ds3 == ds.hstack([ds1,ds2,ds3])#????
-        #assert hstack([ds('x','y'),ds('z')])==ds
-        #hstack([ds('z','y'),ds('x')])==ds
-    assert have_raised2(hstack,[ds('x'),ds('x')])
-    assert have_raised2(hstack,[ds('y','x'),ds('x')])
-    assert not have_raised2(hstack,[ds('x'),ds('y')])
-        
-    #        i=0
-    #        for example in hstack([ds('x'),ds('y'),ds('z')]):
-    #            example==ds[i]
-    #            i+=1 
-    #        del i,example
-    #* ds1 & ds2 & ds3 == ds.vstack([ds1,ds2,ds3])#????
-
-def test_fields_fct(ds):
-    #@todo, fill correctly
-    assert len(ds.fields())==3
-    i=0
-    v=0
-    for field in ds.fields():
-        for field_value in field: # iterate over the values associated to that field for all the ds examples
-            v+=1
-        i+=1
-    assert i==3
-    assert v==3*10
-    del i,v
-    
-    i=0
-    v=0
-    for field in ds('x','z').fields():
-        i+=1
-        for val in field:
-            v+=1
-    assert i==2
-    assert v==2*10
-    del i,v
-    
-    i=0
-    v=0
-    for field in ds.fields('x','y'):
-        i+=1
-        for val in field:
-            v+=1
-    assert i==2
-    assert v==2*10
-    del i,v
-    
-    i=0
-    v=0
-    for field_examples in ds.fields():
-        for example_value in field_examples:
-            v+=1
-        i+=1
-    assert i==3
-    assert v==3*10
-    del i,v
-    
-    assert ds == ds.fields().examples()
-    assert len(ds('x','y').fields()) == 2
-    assert len(ds('x','z').fields()) == 2
-    assert len(ds('y').fields()) == 1
-
-    del field
-def test_all(array,ds):
-    assert len(ds)==10
-
-    test_iterate_over_examples(array, ds)
-    test_getitem(array, ds)
-    test_ds_iterator(array,ds('x','y'),ds('y','z'),ds('x','y','z'))
-    test_fields_fct(ds)
-
-def test_ArrayDataSet():
-    #don't test stream
-    #tested only with float value
-    #don't always test with y
-    #don't test missing value
-    #don't test with tuple
-    #don't test proterties
-    print "test_ArrayDataSet"
-    a2 = numpy.random.rand(10,4)
-    ds = ArrayDataSet(a2,{'x':slice(3),'y':3,'z':[0,2]})###???tuple not tested
-    ds = ArrayDataSet(a2,Example(['x','y','z'],[slice(3),3,[0,2]]))###???tuple not tested
-    #assert ds==a? should this work?
-
-    test_all(a2,ds)
-
-    del a2, ds
-
-def test_CachedDataSet():
-    print "test_CacheDataSet"
-    a = numpy.random.rand(10,4)
-    ds1 = ArrayDataSet(a,Example(['x','y','z'],[slice(3),3,[0,2]]))###???tuple not tested
-    ds2 = CachedDataSet(ds1)
-    ds3 = CachedDataSet(ds1,cache_all_upon_construction=True)
-
-    test_all(a,ds2)
-    test_all(a,ds3)
-
-    del a,ds1,ds2,ds3
-
-
-def test_DataSetFields():
-    print "test_DataSetFields"
-    raise NotImplementedError()
-
-def test_ApplyFunctionDataSet():
-    print "test_ApplyFunctionDataSet"
-    a = numpy.random.rand(10,4)
-    a2 = a+1
-    ds1 = ArrayDataSet(a,Example(['x','y','z'],[slice(3),3,[0,2]]))###???tuple not tested
-
-    ds2 = ApplyFunctionDataSet(ds1,lambda x,y,z: (x+1,y+1,z+1), ['x','y','z'],minibatch_mode=False)
-
-    print ds1.fields('x', 'y', 'z')
-    print '   '
-    print ds2.fields('x', 'y', 'z')
-    print '-----------  '
-
-
-    ds3 = ApplyFunctionDataSet(ds1,lambda x,y,z: (numpy.array(x)+1,numpy.array(y)+1,numpy.array(z)+1),
-                               ['x','y','z'],
-                               minibatch_mode=True)
-
-    test_all(a2,ds2)
-    test_all(a2,ds3)
-
-    del a,ds1,ds2,ds3
-
-def test_FieldsSubsetDataSet():
-    print "test_FieldsSubsetDataSet"
-    raise NotImplementedError()
-def test_MinibatchDataSet():
-    print "test_MinibatchDataSet"
-    raise NotImplementedError()
-def test_HStackedDataSet():
-    print "test_HStackedDataSet"
-    raise NotImplementedError()
-def test_VStackedDataSet():
-    print "test_VStackedDataSet"
-    raise NotImplementedError()
-def test_ArrayFieldsDataSet():
-    print "test_ArrayFieldsDataSet"
-    raise NotImplementedError()
-
-
-def test_speed(array, ds):
-    print "test_speed", ds.__class__
-
-    mat = numpy.random.rand(400,100)
-
-    @print_timing
-    def f_array_full(a):
-        a+1
-    @print_timing
-    def f_array_index(a):
-        for id in range(a.shape[0]):
-#            pass
-            a[id]+1
-#            a[id]*mat
-    @print_timing
-    def f_array_iter(a):
-        for r in a:
-#            pass
-            r+1
-#            r*mat
-    @print_timing
-    def f_ds_index(ds):
-        for id in range(len(ds)):
-#            pass
-            ds[id][0]+1
-#            ds[id][0]*mat
-    @print_timing
-    def f_ds_iter(ds):
-        for ex in ds:
-#            pass
-            ex[0]+1
-#            a[0]*mat
-    @print_timing
-    def f_ds_mb1(ds,mb_size):
-        for exs in ds.minibatches(minibatch_size = mb_size):
-            for ex in exs:
-#                pass
-                ex[0]+1
-#                ex[0]*mat
-    @print_timing
-    def f_ds_mb2(ds,mb_size):
-        for exs in ds.minibatches(minibatch_size = mb_size):
-#            pass
-            exs[0]+1
-#            ex[0]*mat
-
-    f_array_full(array)
-    f_array_index(array)
-    f_array_iter(array)
-
-    f_ds_index(ds)
-    f_ds_iter(ds)
-
-    f_ds_mb1(ds,10)
-    f_ds_mb1(ds,100)
-    f_ds_mb1(ds,1000)
-    f_ds_mb1(ds,10000)
-    f_ds_mb2(ds,10)
-    f_ds_mb2(ds,100)
-    f_ds_mb2(ds,1000)
-    f_ds_mb2(ds,10000)
-
-
-if __name__=='__main__':
-    test_ArrayDataSet()
-    #test_CachedDataSet()
-    #test_ApplyFunctionDataSet()
-
diff -r 4e6b550fe131 -r 174374d59405 test_filetensor.py
--- a/test_filetensor.py	Thu Jun 05 18:43:16 2008 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,116 +0,0 @@
-from filetensor import *
-import filetensor
-
-import unittest
-import os
-
-class T(unittest.TestCase):
-    fname = '/tmp/some_mat'
-
-    def setUp(self):
-        #TODO: test that /tmp/some_mat does not exist
-        try:
-            os.stat(self.fname)
-        except OSError:
-            return #assume file was not found
-        raise Exception('autotest file "%s" exists!' % self.fname)
-
-    def tearDown(self):
-        os.remove(self.fname)
-
-    def test_file(self):
-        gen = numpy.random.rand(1)
-        f = file(self.fname, 'w');
-        write(f, gen)
-        f.flush()
-        f = file(self.fname, 'r');
-        mat = read(f, None, debug=False) #load from filename
-        self.failUnless(gen.shape == mat.shape)
-        self.failUnless(numpy.all(gen == mat))
-
-    def test_filename(self):
-        gen = numpy.random.rand(1)
-        write(self.fname, gen)
-        mat = read(self.fname, None, debug=False) #load from filename
-        self.failUnless(gen.shape == mat.shape)
-        self.failUnless(numpy.all(gen == mat))
-
-    def testNd(self):
-        """shape and values are stored correctly for tensors of rank 0 to 5"""
-        whole_shape = [5, 6, 7, 8, 9]
-        for i in xrange(5):
-            gen = numpy.asarray(numpy.random.rand(*whole_shape[:i]))
-            f = file(self.fname, 'w');
-            write(f, gen)
-            f.flush()
-            f = file(self.fname, 'r');
-            mat = read(f, None, debug=False) #load from filename
-            self.failUnless(gen.shape == mat.shape)
-            self.failUnless(numpy.all(gen == mat))
-
-    def test_dtypes(self):
-        """shape and values are stored correctly for all dtypes """
-        for dtype in filetensor._dtype_magic:
-            gen = numpy.asarray(
-                    numpy.random.rand(4, 5, 2, 1) * 100,
-                    dtype=dtype)
-            f = file(self.fname, 'w');
-            write(f, gen)
-            f.flush()
-            f = file(self.fname, 'r');
-            mat = read(f, None, debug=False) #load from filename
-            self.failUnless(gen.dtype == mat.dtype)
-            self.failUnless(gen.shape == mat.shape)
-            self.failUnless(numpy.all(gen == mat))
-
-    def test_dtype_invalid(self):
-        gen = numpy.zeros((3,4), dtype='uint16') #an unsupported dtype
-        f = file(self.fname, 'w')
-        passed = False
-        try:
-            write(f, gen)
-        except TypeError, e:
-            if e[0].startswith('Invalid ndarray dtype'):
-                passed = True
-        f.close()
-        self.failUnless(passed)
-        
-
-if __name__ == '__main__':
-    unittest.main()
-
-    #a small test script, starts by reading sys.argv[1]
-    #print 'rval', rval.shape, rval.size
-
-    if 0:
-        write(f, rval)
-        print ''
-        f.close()
-        f = file('/tmp/some_mat', 'r');
-        rval2 = read(f) #load from file handle
-        print 'rval2', rval2.shape, rval2.size
-
-        assert rval.dtype == rval2.dtype
-        assert rval.shape == rval2.shape
-        assert numpy.all(rval == rval2)
-        print 'ok'
-
-    def _unused():
-        f.seek(0,2) #seek to end
-        f_len =  f.tell()
-        f.seek(f_data_start,0) #seek back to where we were
-
-        if debug: print 'length:', f_len
-
-
-        f_data_bytes = (f_len - f_data_start)
-
-        if debug: print 'data bytes according to header: ', dim_size * elsize
-        if debug: print 'data bytes according to file  : ', f_data_bytes
-
-        if debug: print 'reading data...'
-        sys.stdout.flush()
-
-    def read_ndarray(f, dim, dtype):
-        return numpy.fromfile(f, dtype=dtype, count=_prod(dim)).reshape(dim)
-
diff -r 4e6b550fe131 -r 174374d59405 test_lookup_list.py
--- a/test_lookup_list.py	Thu Jun 05 18:43:16 2008 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,40 +0,0 @@
-from lookup_list import *
-def have_raised(to_eval, **var):
-    have_thrown = False
-    try:
-        eval(to_eval)
-    except :
-        have_thrown = True
-    return have_thrown
-
-def have_raised2(f, *args, **kwargs):
-    have_thrown = False
-    try:
-        f(*args, **kwargs)
-    except :
-        have_thrown = True
-    return have_thrown
-
-
-def test_LookupList():
-    #test only the example in the doc???
-    print "test_LookupList"
-    example = LookupList(['x','y','z'],[1,2,3])
-    example['x'] = [1, 2, 3] # set or change a field
-    x, y, z = example
-    x = example[0]
-    x = example["x"]
-    assert example.keys()==['x','y','z']
-    assert example.values()==[[1,2,3],2,3]
-    assert example.items()==[('x',[1,2,3]),('y',2),('z',3)]
-    example.append_keyval('u',0) # adds item with name 'u' and value 0
-    assert len(example)==4 # number of items = 4 here
-    example2 = LookupList(['v','w'], ['a','b'])
-    example3 = LookupList(['x','y','z','u','v','w'], [[1, 2, 3],2,3,0,'a','b'])
-    assert example+example2==example3
-    assert have_raised("var['x']+var['x']",x=example)
-
-    del example, example2, example3, x, y ,z
-
-if __name__=='__main__':
-    test_LookupList()
diff -r 4e6b550fe131 -r 174374d59405 test_mlp.py
--- a/test_mlp.py	Thu Jun 05 18:43:16 2008 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,132 +0,0 @@
-
-from mlp import *
-import dataset
-import nnet_ops
-
-
-from functools import partial
-def separator(debugger, i, node, *ths):
-    print "==================="
-
-def what(debugger, i, node, *ths):
-    print "#%i" % i, node
-
-def parents(debugger, i, node, *ths):
-    print [input.step for input in node.inputs]
-
-def input_shapes(debugger, i, node, *ths):
-    print "input shapes: ",
-    for r in node.inputs:
-        if hasattr(r.value, 'shape'):
-            print r.value.shape,
-        else:
-            print "no_shape",
-    print
-
-def input_types(debugger, i, node, *ths):
-    print "input types: ",
-    for r in node.inputs:
-        print r.type,
-    print
-
-def output_shapes(debugger, i, node, *ths):
-    print "output shapes:",
-    for r in node.outputs:
-        if hasattr(r.value, 'shape'):
-            print r.value.shape,
-        else:
-            print "no_shape",
-    print
-
-def output_types(debugger, i, node, *ths):
-    print "output types:",
-    for r in node.outputs:
-        print r.type,
-    print
-
-
-def test0():
-    linker = 'c|py'
-    #linker = partial(theano.gof.DebugLinker, linkers = [theano.gof.OpWiseCLinker],
-    #                 debug_pre = [separator, what, parents, input_types, input_shapes],
-    #                 debug_post = [output_shapes, output_types],
-    #                 compare_fn = lambda x, y: numpy.all(x == y))
-    
-    nnet = OneHiddenLayerNNetClassifier(10,2,.001,1000, linker = linker)
-    training_set = dataset.ArrayDataSet(numpy.array([[0, 0, 0],
-                                                     [0, 1, 1],
-                                                     [1, 0, 1],
-                                                     [1, 1, 1]]),
-                                        {'input':slice(2),'target':2})
-    fprop=nnet(training_set)
-
-    output_ds = fprop(training_set)
-
-    for fieldname in output_ds.fieldNames():
-        print fieldname+"=",output_ds[fieldname]
-
-def test1():
-    nnet = ManualNNet(2, 10,3,.1,1000)
-    training_set = dataset.ArrayDataSet(numpy.array([[0, 0, 0],
-                                                     [0, 1, 1],
-                                                     [1, 0, 1],
-                                                     [1, 1, 1]]),
-                                        {'input':slice(2),'target':2})
-    fprop=nnet(training_set)
-
-    output_ds = fprop(training_set)
-
-    for fieldname in output_ds.fieldNames():
-        print fieldname+"=",output_ds[fieldname]
-
-def test2():
-    training_set = dataset.ArrayDataSet(numpy.array([[0, 0, 0],
-                                                     [0, 1, 1],
-                                                     [1, 0, 1],
-                                                     [1, 1, 1]]),
-                                        {'input':slice(2),'target':2})
-    nin, nhid=2, 10
-    def sigm_layer(input):
-        W1 = t.matrix('W1')
-        b1 = t.vector('b1')
-        return (nnet_ops.sigmoid(b1 + t.dot(input, W1)),
-                [W1, b1],
-                [(numpy.random.rand(nin, nhid) -0.5) * 0.001, numpy.zeros(nhid)])
-    nnet = ManualNNet(nin, nhid, 3, .1, 1000, hidden_layer=sigm_layer)
-    fprop=nnet(training_set)
-
-    output_ds = fprop(training_set)
-
-    for fieldname in output_ds.fieldNames():
-        print fieldname+"=",output_ds[fieldname]
-
-def test_interface_0():
-    learner = ManualNNet(2, 10, 3, .1, 1000)
-
-    model = learner(training_set)
-
-    model2 = learner(training_set)    # trains model a second time
-
-    learner.update(additional_data)   # modifies nnet and model by side-effect
-
-
-def test_interface2_1():
-    learn_algo = ManualNNet(2, 10, 3, .1, 1000)
-
-    prior = learn_algo()
-
-    model1 = learn_algo(training_set1)
-
-    model2 = learn_algo(training_set2)
-
-    model2.update(additional_data)
-
-    n_match = 0
-    for o1, o2 in zip(model1.use(test_data), model2.use(test_data)):
-        n_match += (o1 == o2) 
-
-    print n_match
-
-test1()
-test2()
-
diff -r 4e6b550fe131 -r 174374d59405 test_speed.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test_speed.py	Fri Jun 06 15:56:18 2008 -0400
@@ -0,0 +1,79 @@
+import numpy
+from dataset import *
+from misc import *
+def test_speed(array, ds):
+    print "test_speed", ds.__class__
+
+    mat = numpy.random.rand(400,100)
+
+    @print_timing
+    def f_array_full(a):
+        a+1
+    @print_timing
+    def f_array_index(a):
+        for id in range(a.shape[0]):
+#            pass
+            a[id]+1
+#            a[id]*mat
+    @print_timing
+    def f_array_iter(a):
+        for r in a:
+#            pass
+            r+1
+#            r*mat
+    @print_timing
+    def f_ds_index(ds):
+        for id in range(len(ds)):
+#            pass
+            ds[id][0]+1
+#            ds[id][0]*mat
+    @print_timing
+    def f_ds_iter(ds):
+        for ex in ds:
+#            pass
+            ex[0]+1
+#            a[0]*mat
+    @print_timing
+    def f_ds_mb1(ds,mb_size):
+        for exs in ds.minibatches(minibatch_size = mb_size):
+            for ex in exs:
+#                pass
+                ex[0]+1
+#                ex[0]*mat
+    @print_timing
+    def f_ds_mb2(ds,mb_size):
+        for exs in ds.minibatches(minibatch_size = mb_size):
+#            pass
+            exs[0]+1
+#            ex[0]*mat
+
+    f_array_full(array)
+    f_array_index(array)
+    f_array_iter(array)
+
+    f_ds_index(ds)
+    f_ds_iter(ds)
+
+    f_ds_mb1(ds,10)
+    f_ds_mb1(ds,100)
+    f_ds_mb1(ds,1000)
+    f_ds_mb1(ds,10000)
+    f_ds_mb2(ds,10)
+    f_ds_mb2(ds,100)
+    f_ds_mb2(ds,1000)
+    f_ds_mb2(ds,10000)
+
+if __name__=='__main__':
+    a2 = numpy.random.rand(100000,400)
+    ds1 = ArrayDataSet(a2,{'all':slice(0,a2.shape[1],1)})
+    test_speed(a2,ds1)
+    a1 = numpy.random.rand(100000,40)
+    ds4 = ArrayDataSet(a1,LookupList(["f"+str(x)for x in range(a1.shape[1])],
+                                     range(a1.shape[1])))
+    test_speed(a2,ds4)
+    ds2=CachedDataSet(ds1,cache_all_upon_construction=False)
+    test_speed(a2,ds2)
+    ds3=CachedDataSet(ds1,cache_all_upon_construction=True)
+    test_speed(a2,ds3)
+    del a2,ds1,ds2,ds3
+