# HG changeset patch
# User Joseph Turian <turian@gmail.com>
# Date 1238460484 14400
# Node ID 8fff4bc26f4c1204dca826872634248c2a21fd66
# Parent  27b1344a57b17ede4e62f5b47ac3346e5ea1f231# Parent  9e62fd6b6677586a4a75f53f722196072fa891fa
merge

diff -r 27b1344a57b1 -r 8fff4bc26f4c .hgtags
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/.hgtags	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,1 @@
+5f9ffefa9ca8040e18e1a69fdbbe34b8a19099bc sequencelabelling 20090130-rerun
diff -r 27b1344a57b1 -r 8fff4bc26f4c LICENSE
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/LICENSE	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,24 @@
+Copyright (c) 2008, Theano Development Team
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of Theano nor the names of its contributors may be
+      used to endorse or promote products derived from this software without
+      specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff -r 27b1344a57b1 -r 8fff4bc26f4c README.txt
diff -r 27b1344a57b1 -r 8fff4bc26f4c __init__.py
--- a/__init__.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,10 +0,0 @@
-import filetensor
-import version
-import learner
-
-from lookup_list import LookupList
-
-def __src_version__():
-    #todo - this is vulnerable to the bug in theano ticket #160
-    return version.src_version(__name__)
-
diff -r 27b1344a57b1 -r 8fff4bc26f4c _test_dataset.py
--- a/_test_dataset.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,680 +0,0 @@
-#!/bin/env python
-from dataset import *
-from math import *
-import numpy, unittest, sys
-#from misc import *
-from lookup_list import LookupList
-
-def have_raised(to_eval, **var):
-    have_thrown = False
-    try:
-        eval(to_eval)
-    except :
-        have_thrown = True
-    return have_thrown
-
-def have_raised2(f, *args, **kwargs):
-    have_thrown = False
-    try:
-        f(*args, **kwargs)
-    except :
-        have_thrown = True
-    return have_thrown
-
-def test1():
-    print "test1"
-    global a,ds
-    a = numpy.random.rand(10,4)
-    print a
-    ds = ArrayDataSet(a,{'x':slice(3),'y':3,'z':[0,2]})
-    print "len(ds)=",len(ds)
-    assert(len(ds)==10)
-    print "example 0 = ",ds[0]
-#    assert
-    print "x=",ds["x"]
-    print "x|y"
-    for x,y in ds("x","y"):
-        print x,y
-    minibatch_iterator = ds.minibatches(fieldnames=['z','y'],n_batches=1,minibatch_size=3,offset=4)
-    minibatch = minibatch_iterator.__iter__().next()
-    print "minibatch=",minibatch
-    for var in minibatch:
-        print "var=",var
-    print "take a slice and look at field y",ds[1:6:2]["y"]
-
-    del a,ds,x,y,minibatch_iterator,minibatch,var
-
-def test_iterate_over_examples(array,ds):
-#not in doc!!!
-    i=0
-    for example in range(len(ds)):
-        wanted = array[example][:3]
-        returned = ds[example]['x']
-        if (wanted != returned).all():
-            print 'returned:', returned
-            print 'wanted:', wanted
-        assert (ds[example]['x']==array[example][:3]).all()
-        assert ds[example]['y']==array[example][3]
-        assert (ds[example]['z']==array[example][[0,2]]).all()
-        i+=1
-    assert i==len(ds)
-    del example,i
-
-#     - for example in dataset:
-    i=0
-    for example in ds:
-        assert len(example)==3
-        assert (example['x']==array[i][:3]).all()
-        assert example['y']==array[i][3]
-        assert (example['z']==array[i][0:3:2]).all()
-        assert (numpy.append(example['x'],example['y'])==array[i]).all()
-        i+=1
-    assert i==len(ds)
-    del example,i
-
-#     - for val1,val2,... in dataset:
-    i=0
-    for x,y,z in ds:
-        assert (x==array[i][:3]).all()
-        assert y==array[i][3]
-        assert (z==array[i][0:3:2]).all()
-        assert (numpy.append(x,y)==array[i]).all()
-        i+=1
-    assert i==len(ds)
-    del x,y,z,i
-
-#     - for example in dataset(field1, field2,field3, ...):
-    i=0
-    for example in ds('x','y','z'):
-        assert len(example)==3
-        assert (example['x']==array[i][:3]).all()
-        assert example['y']==array[i][3]
-        assert (example['z']==array[i][0:3:2]).all()
-        assert (numpy.append(example['x'],example['y'])==array[i]).all()
-        i+=1
-    assert i==len(ds)
-    del example,i
-    i=0
-    for example in ds('y','x'):
-        assert len(example)==2
-        assert (example['x']==array[i][:3]).all()
-        assert example['y']==array[i][3]
-        assert (numpy.append(example['x'],example['y'])==array[i]).all()
-        i+=1
-    assert i==len(ds)
-    del example,i
-
-#     - for val1,val2,val3 in dataset(field1, field2,field3):
-    i=0
-    for x,y,z in ds('x','y','z'):
-        assert (x==array[i][:3]).all()
-        assert y==array[i][3]
-        assert (z==array[i][0:3:2]).all()
-        assert (numpy.append(x,y)==array[i]).all()
-        i+=1
-    assert i==len(ds)
-    del x,y,z,i
-    i=0
-    for y,x in ds('y','x',):
-        assert (x==array[i][:3]).all()
-        assert y==array[i][3]
-        assert (numpy.append(x,y)==array[i]).all()
-        i+=1
-    assert i==len(ds)
-    del x,y,i
-
-    def test_minibatch_size(minibatch,minibatch_size,len_ds,nb_field,nb_iter_finished):
-        ##full minibatch or the last minibatch
-        for idx in range(nb_field):
-            test_minibatch_field_size(minibatch[idx],minibatch_size,len_ds,nb_iter_finished)
-        del idx
-    def test_minibatch_field_size(minibatch_field,minibatch_size,len_ds,nb_iter_finished):
-        assert len(minibatch_field)==minibatch_size or ((nb_iter_finished*minibatch_size+len(minibatch_field))==len_ds and len(minibatch_field)<minibatch_size)
-
-#     - for minibatch in dataset.minibatches([field1, field2, ...],minibatch_size=N):
-    i=0
-    mi=0
-    size=3
-    m=ds.minibatches(['x','z'], minibatch_size=size)
-    assert hasattr(m,'__iter__')
-    for minibatch in m:
-        assert isinstance(minibatch,LookupList)
-        assert len(minibatch)==2
-        test_minibatch_size(minibatch,size,len(ds),2,mi)
-        if type(ds)==ArrayDataSet:
-            assert (minibatch[0][:,::2]==minibatch[1]).all()
-        else:
-            for j in xrange(len(minibatch[0])):
-                (minibatch[0][j][::2]==minibatch[1][j]).all()
-        mi+=1
-        i+=len(minibatch[0])
-    assert i==(len(ds)/size)*size
-    assert mi==(len(ds)/size)
-    del minibatch,i,m,mi,size
-
-    i=0
-    mi=0
-    size=3
-    m=ds.minibatches(['x','y'], minibatch_size=size)
-    assert hasattr(m,'__iter__')
-    for minibatch in m:
-        assert isinstance(minibatch,LookupList)
-        assert len(minibatch)==2
-        test_minibatch_size(minibatch,size,len(ds),2,mi)
-        mi+=1
-        for id in range(len(minibatch[0])):
-            assert (numpy.append(minibatch[0][id],minibatch[1][id])==array[i]).all()
-            i+=1
-    assert i==(len(ds)/size)*size
-    assert mi==(len(ds)/size)
-    del minibatch,i,id,m,mi,size
-
-#     - for mini1,mini2,mini3 in dataset.minibatches([field1, field2, field3], minibatch_size=N):
-    i=0
-    mi=0
-    size=3
-    m=ds.minibatches(['x','z'], minibatch_size=size)
-    assert hasattr(m,'__iter__')
-    for x,z in m:
-        test_minibatch_field_size(x,size,len(ds),mi)
-        test_minibatch_field_size(z,size,len(ds),mi)
-        for id in range(len(x)):
-            assert (x[id][::2]==z[id]).all()
-            i+=1
-        mi+=1
-    assert i==(len(ds)/size)*size
-    assert mi==(len(ds)/size)
-    del x,z,i,m,mi,size
-
-    i=0
-    mi=0
-    size=3
-    m=ds.minibatches(['x','y'], minibatch_size=3)
-    assert hasattr(m,'__iter__')
-    for x,y in m:
-        assert len(x)==size
-        assert len(y)==size
-        test_minibatch_field_size(x,size,len(ds),mi)
-        test_minibatch_field_size(y,size,len(ds),mi)
-        mi+=1
-        for id in range(len(x)):
-            assert (numpy.append(x[id],y[id])==array[i]).all()
-            i+=1
-    assert i==(len(ds)/size)*size
-    assert mi==(len(ds)/size)
-    del x,y,i,id,m,mi,size
-
-#not in doc
-    i=0
-    size=3
-    m=ds.minibatches(['x','y'],n_batches=1,minibatch_size=size,offset=4)
-    assert hasattr(m,'__iter__')
-    for x,y in m:
-        assert len(x)==size
-        assert len(y)==size
-        for id in range(size):
-            assert (numpy.append(x[id],y[id])==array[i+4]).all()
-            i+=1
-    assert i==size
-    del x,y,i,id,m,size
-
-    i=0
-    size=3
-    m=ds.minibatches(['x','y'],n_batches=2,minibatch_size=size,offset=4)
-    assert hasattr(m,'__iter__')
-    for x,y in m:
-        assert len(x)==size
-        assert len(y)==size
-        for id in range(size):
-            assert (numpy.append(x[id],y[id])==array[i+4]).all()
-            i+=1
-    assert i==2*size
-    del x,y,i,id,m,size
-
-    i=0
-    size=3
-    m=ds.minibatches(['x','y'],n_batches=20,minibatch_size=size,offset=4)
-    assert hasattr(m,'__iter__')
-    for x,y in m:
-        assert len(x)==size
-        assert len(y)==size
-        for id in range(size):
-            assert (numpy.append(x[id],y[id])==array[(i+4)%array.shape[0]]).all()
-            i+=1
-    assert i==2*size # should not wrap
-    del x,y,i,id,size
-
-    assert have_raised2(ds.minibatches,['x','y'],n_batches=1,minibatch_size=len(array)+1,offset=0)
-    assert not have_raised2(ds.minibatches,['x','y'],n_batches=1,minibatch_size=len(array),offset=0)
-
-def test_ds_iterator(array,iterator1,iterator2,iterator3):
-    l=len(iterator1)
-    i=0
-    for x,y in iterator1:
-        assert (x==array[i][:3]).all()
-        assert y==array[i][3]
-        assert (numpy.append(x,y)==array[i]).all()
-        i+=1
-    assert i==l
-    i=0
-    for y,z in iterator2:
-        assert y==array[i][3]
-        assert (z==array[i][0:3:2]).all()
-        i+=1
-    assert i==l
-    i=0
-    for x,y,z in iterator3:
-        assert (x==array[i][:3]).all()
-        assert y==array[i][3]
-        assert (z==array[i][0:3:2]).all()
-        assert (numpy.append(x,y)==array[i]).all()
-        i+=1
-    assert i==l
-    
-def test_getitem(array,ds):
-    def test_ds(orig,ds,index):
-        i=0
-        assert isinstance(ds,LookupList)
-        assert len(ds)==3
-        assert len(ds[0])==len(index)
-#        for x,z,y in ds('x','z','y'):
-        for idx in index:
-            assert (orig[idx]['x']==array[idx][:3]).all()
-            assert (orig[idx]['x']==ds['x'][i]).all()
-            assert orig[idx]['y']==array[idx][3]
-            assert (orig[idx]['y']==ds['y'][i]).all() # why does it crash sometimes?
-            assert (orig[idx]['z']==array[idx][0:3:2]).all()
-            assert (orig[idx]['z']==ds['z'][i]).all()
-            i+=1
-        del i
-        ds[0]
-        if len(ds)>2:
-            ds[:1]
-            ds[1:1]
-            ds[1:1:1]
-        if len(ds)>5:
-            ds[[1,2,3]]
-        for x in ds:
-            pass
-
-#ds[:n] returns a LookupList with the n first examples.
-    ds2=ds[:3]
-    test_ds(ds,ds2,index=[0,1,2])
-    del ds2
-
-#ds[i:j] returns a LookupList with examples i,i+1,...,j-1.
-    ds2=ds[1:3]
-    test_ds(ds,ds2,index=[1,2])
-    del ds2
-
-#ds[i1:i2:s] returns a LookupList with the examples i1,i1+s,...i2-s.
-    ds2=ds[1:7:2]
-    test_ds(ds,ds2,[1,3,5])
-    del ds2
-
-#ds[i] returns the (i+1)-th example of the dataset.
-    ds2=ds[5]
-    assert isinstance(ds2,Example)
-    test_ds(ds,ds2,[5])
-    assert have_raised("var['ds']["+str(len(ds))+"]",ds=ds)  # index not defined
-    assert not have_raised("var['ds']["+str(len(ds)-1)+"]",ds=ds)
-    del ds2
-
-#ds[[i1,i2,...in]]# returns a ds with examples i1,i2,...in.
-    ds2=ds[[4,7,2,8]]
-#    assert isinstance(ds2,DataSet)
-    test_ds(ds,ds2,[4,7,2,8])
-    del ds2
-
-    #ds.<property># returns the value of a property associated with
-      #the name <property>. The following properties should be supported:
-      #    - 'description': a textual description or name for the ds
-      #    - 'fieldtypes': a list of types (one per field)
-
-    #* ds1 | ds2 | ds3 == ds.hstack([ds1,ds2,ds3])#????
-        #assert hstack([ds('x','y'),ds('z')])==ds
-        #hstack([ds('z','y'),ds('x')])==ds
-    assert have_raised2(hstack,[ds('x'),ds('x')])
-    assert have_raised2(hstack,[ds('y','x'),ds('x')])
-    assert not have_raised2(hstack,[ds('x'),ds('y')])
-        
-    #        i=0
-    #        for example in hstack([ds('x'),ds('y'),ds('z')]):
-    #            example==ds[i]
-    #            i+=1 
-    #        del i,example
-    #* ds1 & ds2 & ds3 == ds.vstack([ds1,ds2,ds3])#????
-
-def test_subset(array,ds):
-    def test_ds(orig,ds,index):
-        i=0
-        assert isinstance(ds2,DataSet)
-        assert len(ds)==len(index)
-        for x,z,y in ds('x','z','y'):
-            assert (orig[index[i]]['x']==array[index[i]][:3]).all()
-            assert (orig[index[i]]['x']==x).all()
-            assert orig[index[i]]['y']==array[index[i]][3]
-            assert orig[index[i]]['y']==y
-            assert (orig[index[i]]['z']==array[index[i]][0:3:2]).all()
-            assert (orig[index[i]]['z']==z).all()
-            i+=1
-        del i
-        ds[0]
-        if len(ds)>2:
-            ds[:1]
-            ds[1:1]
-            ds[1:1:1]
-        if len(ds)>5:
-            ds[[1,2,3]]
-        for x in ds:
-            pass
-
-#ds[:n] returns a dataset with the n first examples.
-    ds2=ds.subset[:3]
-    test_ds(ds,ds2,index=[0,1,2])
-#    del ds2
-
-#ds[i1:i2:s]# returns a ds with the examples i1,i1+s,...i2-s.
-    ds2=ds.subset[1:7:2]
-    test_ds(ds,ds2,[1,3,5])
-#     del ds2
-
-# #ds[i]
-#     ds2=ds.subset[5]
-#     assert isinstance(ds2,Example)
-#     assert have_raised("var['ds']["+str(len(ds))+"]",ds=ds)  # index not defined
-#     assert not have_raised("var['ds']["+str(len(ds)-1)+"]",ds=ds)
-#     del ds2
-
-#ds[[i1,i2,...in]]# returns a ds with examples i1,i2,...in.
-    ds2=ds.subset[[4,7,2,8]]
-    test_ds(ds,ds2,[4,7,2,8])
-#     del ds2
-
-#ds.<property># returns the value of a property associated with
-  #the name <property>. The following properties should be supported:
-  #    - 'description': a textual description or name for the ds
-  #    - 'fieldtypes': a list of types (one per field)
-
-#* ds1 | ds2 | ds3 == ds.hstack([ds1,ds2,ds3])#????
-    #assert hstack([ds('x','y'),ds('z')])==ds
-    #hstack([ds('z','y'),ds('x')])==ds
-    assert have_raised2(hstack,[ds('x'),ds('x')])
-    assert have_raised2(hstack,[ds('y','x'),ds('x')])
-    assert not have_raised2(hstack,[ds('x'),ds('y')])
-    
-#        i=0
-#        for example in hstack([ds('x'),ds('y'),ds('z')]):
-#            example==ds[i]
-#            i+=1 
-#        del i,example
-#* ds1 & ds2 & ds3 == ds.vstack([ds1,ds2,ds3])#????
-
-def test_fields_fct(ds):
-    #@todo, fill correctly
-    assert len(ds.fields())==3
-    i=0
-    v=0
-    for field in ds.fields():
-        for field_value in field: # iterate over the values associated to that field for all the ds examples
-            v+=1
-        i+=1
-    assert i==3
-    assert v==3*10
-    del i,v
-    
-    i=0
-    v=0
-    for field in ds('x','z').fields():
-        i+=1
-        for val in field:
-            v+=1
-    assert i==2
-    assert v==2*10
-    del i,v
-    
-    i=0
-    v=0
-    for field in ds.fields('x','y'):
-        i+=1
-        for val in field:
-            v+=1
-    assert i==2
-    assert v==2*10
-    del i,v
-    
-    i=0
-    v=0
-    for field_examples in ds.fields():
-        for example_value in field_examples:
-            v+=1
-        i+=1
-    assert i==3
-    assert v==3*10
-    del i,v
-    
-    assert ds == ds.fields().examples()
-    assert len(ds('x','y').fields()) == 2
-    assert len(ds('x','z').fields()) == 2
-    assert len(ds('y').fields()) == 1
-
-    del field
-
-def test_overrides(ds) :
-    """ Test for examples that an override __getitem__ acts as the one in DataSet """
-    def ndarray_list_equal(nda,l) :
-        """ 
-        Compares if a ndarray is the same as the list. Do it by converting the list into
-        an numpy.ndarray, if possible
-        """
-        try :
-            l = numpy.asmatrix(l)
-        except :
-            return False
-        return smart_equal(nda,l)
-        
-    def smart_equal(a1,a2) :
-        """
-        Handles numpy.ndarray, LookupList, and basic containers
-        """
-        if not isinstance(a1,type(a2)) and not isinstance(a2,type(a1)):
-            #special case: matrix vs list of arrays
-            if isinstance(a1,numpy.ndarray) :
-                return ndarray_list_equal(a1,a2)
-            elif isinstance(a2,numpy.ndarray) :
-                return ndarray_list_equal(a2,a1)
-            return False
-        # compares 2 numpy.ndarray
-        if isinstance(a1,numpy.ndarray):
-            if len(a1.shape) != len(a2.shape):
-                return False
-            for k in range(len(a1.shape)) :
-                if a1.shape[k] != a2.shape[k]:
-                    return False
-            return (a1==a2).all()
-        # compares 2 lookuplists
-        if isinstance(a1,LookupList) :
-            if len(a1._names) != len(a2._names) :
-                return False
-            for k in a1._names :
-                if k not in a2._names :
-                    return False
-                if not smart_equal(a1[k],a2[k]) :
-                    return False
-            return True
-        # compares 2 basic containers
-        if hasattr(a1,'__len__'):
-            if len(a1) != len(a2) :
-                return False
-            for k in range(len(a1)) :
-                if not smart_equal(a1[k],a2[k]):
-                    return False
-            return True
-        # try basic equals
-        return a1 is a2
-
-    def mask(ds) :
-        class TestOverride(type(ds)):
-            def __init__(self,ds) :
-                self.ds = ds
-            def __getitem__(self,key) :
-                res1 = self.ds[key]
-                res2 = DataSet.__getitem__(ds,key)
-                assert smart_equal(res1,res2)
-                return res1
-        return TestOverride(ds)
-    # test getitem
-    ds2 = mask(ds)
-    for k in range(10):
-        res = ds2[k]
-    res = ds2[1:len(ds):3]
-    
-        
-
-    
-
-
-def test_all(array,ds):
-    assert len(ds)==10
-    test_iterate_over_examples(array, ds)
-    test_overrides(ds)
-    test_getitem(array, ds)
-    test_subset(array, ds)
-    test_ds_iterator(array,ds('x','y'),ds('y','z'),ds('x','y','z'))
-    test_fields_fct(ds)
-
-
-class T_DataSet(unittest.TestCase):
-    def test_ArrayDataSet(self):
-        #don't test stream
-        #tested only with float value
-        #don't always test with y
-        #don't test missing value
-        #don't test with tuple
-        #don't test proterties
-        a2 = numpy.random.rand(10,4)
-        ds = ArrayDataSet(a2,{'x':slice(3),'y':3,'z':[0,2]})###???tuple not tested
-        ds = ArrayDataSet(a2,Example(['x','y','z'],[slice(3),3,[0,2]]))###???tuple not tested
-        #assert ds==a? should this work?
-
-        test_all(a2,ds)
-
-        del a2, ds
-
-    def test_CachedDataSet(self):
-        a = numpy.random.rand(10,4)
-        ds1 = ArrayDataSet(a,Example(['x','y','z'],[slice(3),3,[0,2]]))###???tuple not tested
-        ds2 = CachedDataSet(ds1)
-        ds3 = CachedDataSet(ds1,cache_all_upon_construction=True)
-
-        test_all(a,ds2)
-        test_all(a,ds3)
-
-        del a,ds1,ds2,ds3
-
-
-    def test_DataSetFields(self):
-        raise NotImplementedError()
-
-    def test_ApplyFunctionDataSet(self):
-        a = numpy.random.rand(10,4)
-        a2 = a+1
-        ds1 = ArrayDataSet(a,Example(['x','y','z'],[slice(3),3,[0,2]]))###???tuple not tested
-
-        ds2 = ApplyFunctionDataSet(ds1,lambda x,y,z: (x+1,y+1,z+1), ['x','y','z'],minibatch_mode=False)
-        ds3 = ApplyFunctionDataSet(ds1,lambda x,y,z: (numpy.array(x)+1,numpy.array(y)+1,numpy.array(z)+1),
-                                   ['x','y','z'],
-                                   minibatch_mode=True)
-
-        test_all(a2,ds2)
-        test_all(a2,ds3)
-
-        del a,ds1,ds2,ds3
-
-    def test_FieldsSubsetDataSet(self):
-        a = numpy.random.rand(10,4)
-        ds = ArrayDataSet(a,Example(['x','y','z','w'],[slice(3),3,[0,2],0]))
-        ds = FieldsSubsetDataSet(ds,['x','y','z'])
-
-        test_all(a,ds)
-
-        del a, ds
-
-    def test_RenamedFieldsDataSet(self):
-        a = numpy.random.rand(10,4)
-        ds = ArrayDataSet(a,Example(['x1','y1','z1','w1'],[slice(3),3,[0,2],0]))
-        ds = RenamedFieldsDataSet(ds,['x1','y1','z1'],['x','y','z'])
-
-        test_all(a,ds)
-
-        del a, ds
-
-    def test_MinibatchDataSet(self):
-        raise NotImplementedError()
-    def test_HStackedDataSet(self):
-        raise NotImplementedError()
-    def test_VStackedDataSet(self):
-        raise NotImplementedError()
-    def test_ArrayFieldsDataSet(self):
-        raise NotImplementedError()
-
-
-class T_Exotic1(unittest.TestCase):
-    class DataSet(DataSet):
-            """ Dummy dataset, where one field is a ndarray of variables size. """
-            def __len__(self) :
-                return 100
-            def fieldNames(self) :
-                return 'input','target','name'
-            def minibatches_nowrap(self,fieldnames,minibatch_size,n_batches,offset):
-                class MultiLengthDataSetIterator(object):
-                    def __init__(self,dataset,fieldnames,minibatch_size,n_batches,offset):
-                        if fieldnames is None: fieldnames = dataset.fieldNames()
-                        self.minibatch = Example(fieldnames,range(len(fieldnames)))
-                        self.dataset, self.minibatch_size, self.current = dataset, minibatch_size, offset
-                    def __iter__(self):
-                            return self
-                    def next(self):
-                        for k in self.minibatch._names :
-                            self.minibatch[k] = []
-                        for ex in range(self.minibatch_size) :
-                            if 'input' in self.minibatch._names:
-                                self.minibatch['input'].append( numpy.array( range(self.current + 1) ) )
-                            if 'target' in self.minibatch._names:
-                                self.minibatch['target'].append( self.current % 2 )
-                            if 'name' in self.minibatch._names:
-                                self.minibatch['name'].append( str(self.current) )
-                            self.current += 1
-                        return self.minibatch
-                return MultiLengthDataSetIterator(self,fieldnames,minibatch_size,n_batches,offset)
-    
-    def test_ApplyFunctionDataSet(self):
-        ds = T_Exotic1.DataSet()
-        dsa = ApplyFunctionDataSet(ds,lambda x,y,z: (x[-1],y*10,int(z)),['input','target','name'],minibatch_mode=False) #broken!!!!!!
-        for k in range(len(dsa)):
-            res = dsa[k]
-            self.failUnless(ds[k]('input')[0][-1] == res('input')[0] , 'problem in first applied function')
-        res = dsa[33:96:3]
-          
-    def test_CachedDataSet(self):
-        ds = T_Exotic1.DataSet()
-        dsc = CachedDataSet(ds)
-        for k in range(len(dsc)) :
-            self.failUnless(numpy.all( dsc[k]('input')[0] == ds[k]('input')[0] ) , (dsc[k],ds[k]) )
-        res = dsc[:]
-
-if __name__=='__main__':
-    tests = []
-    debug=False
-    if len(sys.argv)==1:
-        unittest.main()
-    else:
-        assert sys.argv[1]=="--debug"
-        for arg in sys.argv[2:]:
-            tests.append(arg)
-        if tests:
-            unittest.TestSuite(map(T_DataSet, tests)).debug()
-        else:
-            module = __import__("_test_dataset")
-            tests = unittest.TestLoader().loadTestsFromModule(module)
-            tests.debug()
diff -r 27b1344a57b1 -r 8fff4bc26f4c _test_filetensor.py
--- a/_test_filetensor.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,120 +0,0 @@
-from filetensor import *
-import filetensor
-
-import unittest
-import os
-
-class T(unittest.TestCase):
-    fname = '/tmp/some_mat'
-
-    def setUp(self):
-        #TODO: test that /tmp/some_mat does not exist
-        try:
-            os.stat(self.fname)
-        except OSError:
-            return #assume file was not found
-        raise Exception('autotest file "%s" exists!' % self.fname)
-
-    def tearDown(self):
-        os.remove(self.fname)
-
-    def test_file(self):
-        gen = numpy.random.rand(1)
-        f = file(self.fname, 'w');
-        write(f, gen)
-        f.flush()
-        f = file(self.fname, 'r');
-        mat = read(f, None, debug=False) #load from filename
-        self.failUnless(gen.shape == mat.shape)
-        self.failUnless(numpy.all(gen == mat))
-
-    def test_filename(self):
-        gen = numpy.random.rand(1)
-        f = file(self.fname, 'w')
-        write(f, gen)
-        f.close()
-        f = file(self.fname, 'r')
-        mat = read(f, None, debug=False) #load from filename
-        f.close()
-        self.failUnless(gen.shape == mat.shape)
-        self.failUnless(numpy.all(gen == mat))
-
-    def testNd(self):
-        """shape and values are stored correctly for tensors of rank 0 to 5"""
-        whole_shape = [5, 6, 7, 8, 9]
-        for i in xrange(5):
-            gen = numpy.asarray(numpy.random.rand(*whole_shape[:i]))
-            f = file(self.fname, 'w');
-            write(f, gen)
-            f.flush()
-            f = file(self.fname, 'r');
-            mat = read(f, None, debug=False) #load from filename
-            self.failUnless(gen.shape == mat.shape)
-            self.failUnless(numpy.all(gen == mat))
-
-    def test_dtypes(self):
-        """shape and values are stored correctly for all dtypes """
-        for dtype in filetensor._dtype_magic:
-            gen = numpy.asarray(
-                    numpy.random.rand(4, 5, 2, 1) * 100,
-                    dtype=dtype)
-            f = file(self.fname, 'w');
-            write(f, gen)
-            f.flush()
-            f = file(self.fname, 'r');
-            mat = read(f, None, debug=False) #load from filename
-            self.failUnless(gen.dtype == mat.dtype)
-            self.failUnless(gen.shape == mat.shape)
-            self.failUnless(numpy.all(gen == mat))
-
-    def test_dtype_invalid(self):
-        gen = numpy.zeros((3,4), dtype='uint16') #an unsupported dtype
-        f = file(self.fname, 'w')
-        passed = False
-        try:
-            write(f, gen)
-        except TypeError, e:
-            if e[0].startswith('Invalid ndarray dtype'):
-                passed = True
-        f.close()
-        self.failUnless(passed)
-        
-
-if __name__ == '__main__':
-    unittest.main()
-
-    #a small test script, starts by reading sys.argv[1]
-    #print 'rval', rval.shape, rval.size
-
-    if 0:
-        write(f, rval)
-        print ''
-        f.close()
-        f = file('/tmp/some_mat', 'r');
-        rval2 = read(f) #load from file handle
-        print 'rval2', rval2.shape, rval2.size
-
-        assert rval.dtype == rval2.dtype
-        assert rval.shape == rval2.shape
-        assert numpy.all(rval == rval2)
-        print 'ok'
-
-    def _unused():
-        f.seek(0,2) #seek to end
-        f_len =  f.tell()
-        f.seek(f_data_start,0) #seek back to where we were
-
-        if debug: print 'length:', f_len
-
-
-        f_data_bytes = (f_len - f_data_start)
-
-        if debug: print 'data bytes according to header: ', dim_size * elsize
-        if debug: print 'data bytes according to file  : ', f_data_bytes
-
-        if debug: print 'reading data...'
-        sys.stdout.flush()
-
-    def read_ndarray(f, dim, dtype):
-        return numpy.fromfile(f, dtype=dtype, count=_prod(dim)).reshape(dim)
-
diff -r 27b1344a57b1 -r 8fff4bc26f4c _test_linear_regression.py
--- a/_test_linear_regression.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,38 +0,0 @@
-
-import unittest
-from linear_regression import *
-from make_test_datasets import *
-import numpy
-
-class test_linear_regression(unittest.TestCase):
-
-    def test1(self):
-        trainset,testset,theta=make_artificial_datasets_from_function(n_inputs=3,
-                                                                      n_targets=2,
-                                                                      n_examples=100,
-                                                                      f=linear_predictor)
-        
-        assert trainset.fields()['input'].shape==(50,3)
-        assert testset.fields()['target'].shape==(50,2)
-        regressor = LinearRegression(L2_regularizer=0.1)
-        predictor = regressor(trainset)
-        test_data = testset.fields()
-        mse = predictor.compute_mse(test_data['input'],test_data['target'])
-        print 'mse = ',mse
-        
-if __name__ == '__main__':
-    import sys
-
-    if len(sys.argv)==1:
-        unittest.main()
-    else:
-        assert sys.argv[1]=="--debug"
-        tests = []
-        for arg in sys.argv[2:]:
-            tests.append(arg)
-        if tests:
-            unittest.TestSuite(map(T_DataSet, tests)).debug()
-        else:
-            module = __import__("_test_linear_regression")
-            tests = unittest.TestLoader().loadTestsFromModule(module)
-            tests.debug()
diff -r 27b1344a57b1 -r 8fff4bc26f4c _test_lookup_list.py
--- a/_test_lookup_list.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,24 +0,0 @@
-from lookup_list import *
-import unittest
-
-class T_LookUpList(unittest.TestCase):
-    def test_LookupList(self):
-        #test only the example in the doc???
-        example = LookupList(['x','y','z'],[1,2,3])
-        example['x'] = [1, 2, 3] # set or change a field
-        x, y, z = example
-        x = example[0]
-        x = example["x"]
-        assert example.keys()==['x','y','z']
-        assert example.values()==[[1,2,3],2,3]
-        assert example.items()==[('x',[1,2,3]),('y',2),('z',3)]
-        example.append_keyval('u',0) # adds item with name 'u' and value 0
-        assert len(example)==4 # number of items = 4 here
-        example2 = LookupList(['v','w'], ['a','b'])
-        example3 = LookupList(['x','y','z','u','v','w'], [[1, 2, 3],2,3,0,'a','b'])
-        assert example+example2==example3
-        self.assertRaises(AssertionError,example.__add__,example)
-        del example, example2, example3, x, y ,z
-
-if __name__=='__main__':
-    unittest.main()
diff -r 27b1344a57b1 -r 8fff4bc26f4c _test_nnet_ops.py
--- a/_test_nnet_ops.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,132 +0,0 @@
-
-import unittest
-import theano
-import theano._test_tensor as TT
-import numpy
-
-from nnet_ops import *
-
-class T_sigmoid(unittest.TestCase):
-    def setUp(self):
-        numpy.random.seed(9999)
-    def test_elemwise(self):
-        TT.verify_grad(self, sigmoid, [numpy.random.rand(3,4)])
-
-class T_softplus(unittest.TestCase):
-    def setUp(self):
-        numpy.random.seed(9999)
-    def test_elemwise(self):
-        TT.verify_grad(self, softplus, [numpy.random.rand(3,4)])
-
-class T_Softmax(unittest.TestCase):
-    def setUp(self):
-        numpy.random.seed(9999)
-    def test0(self):
-        class Dummy(object):
-            def make_node(self, a):
-                return [softmax(a)[:,0]]
-        TT.verify_grad(self, Dummy(), [numpy.random.rand(3,4)])
-    def test1(self):
-        class Dummy(object):
-            def make_node(self, a):
-                return [softmax(a)[:,1]]
-        TT.verify_grad(self, Dummy(), [numpy.random.rand(3,4)])
-    def test2(self):
-        class Dummy(object):
-            def make_node(self, a):
-                return [softmax(a)[:,2]]
-        TT.verify_grad(self, Dummy(), [numpy.random.rand(3,4)])
-    def test3(self):
-        class Dummy(object):
-            def make_node(self, a):
-                return [softmax(a)[:,3]]
-        TT.verify_grad(self, Dummy(), [numpy.random.rand(3,4)])
-
-
-class T_SoftmaxWithBias(unittest.TestCase):
-    def setUp(self):
-        numpy.random.seed(9999)
-    def test0(self):
-        class Dummy(object):
-            def make_node(self, a, b):
-                return [softmax_with_bias(a, b)[:,0]]
-        TT.verify_grad(self, Dummy(), [numpy.random.rand(3,4),
-            numpy.random.rand(4)])
-    def test1(self):
-        class Dummy(object):
-            def make_node(self, a, b):
-                return [softmax_with_bias(a, b)[:,1]]
-        TT.verify_grad(self, Dummy(), [numpy.random.rand(3,4),
-            numpy.random.rand(4)])
-    def test2(self):
-        class Dummy(object):
-            def make_node(self, a, b):
-                return [softmax_with_bias(a, b)[:,2]]
-        TT.verify_grad(self, Dummy(), [numpy.random.rand(3,4),
-            numpy.random.rand(4)])
-    def test3(self):
-        class Dummy(object):
-            def make_node(self, a, b):
-                return [softmax_with_bias(a, b)[:,3]]
-        TT.verify_grad(self, Dummy(), [numpy.random.rand(3,4),
-            numpy.random.rand(4)])
-
-class T_CrossentropySoftmax1Hot(unittest.TestCase):
-    def setUp(self):
-        numpy.random.seed(9999)
-    def test0(self):
-        y_idx = [0,1,3]
-        class Dummy(object):
-            def make_node(self, a,b):
-                return crossentropy_softmax_1hot_with_bias(a, b, y_idx)[0:1]
-        TT.verify_grad(self, Dummy(), [numpy.random.rand(3,4),
-            numpy.random.rand(4)])
-
-    def test1(self):
-        y_idx = [0,1,3]
-        class Dummy(object):
-            def make_node(self, a):
-                return crossentropy_softmax_1hot(a, y_idx)[0:1]
-        TT.verify_grad(self, Dummy(), [numpy.random.rand(3,4)])
-
-class T_prepend(unittest.TestCase):
-    def test0(self):
-        """basic functionality"""
-        x=tensor.matrix('x')
-        y=Prepend_scalar_constant_to_each_row(4.)(x)
-        f=theano.function([x],[y])
-        m=numpy.random.rand(3,5)
-        my = f(m)
-        self.failUnless(my.shape == (3, 6), my.shape)
-        self.failUnless(numpy.all( my[:,0] == 4.0))
-
-
-class T_prepend(unittest.TestCase):
-    def test0(self):
-        """basic functionality"""
-        x=tensor.matrix('x')
-        y=Prepend_scalar_to_each_row()(5.,x)
-        f=theano.function([x],[y])
-        m=numpy.ones((3,5),dtype="float32")
-        my = f(m)
-        self.failUnless(str(my.dtype) == 'float64')
-        self.failUnless(my.shape == (3, 6))
-        self.failUnless(numpy.all(my[:,0] == 5.0))
-
-class T_solve(unittest.TestCase):
-    def setUp(self):
-        self.rng = numpy.random.RandomState(666)
-
-    def test0(self):
-        A=self.rng.randn(5,5)
-        b=numpy.array(range(5),dtype=float)
-        x=numpy.linalg.solve(A,b)
-        Ax = numpy.dot(A,x)
-        are = theano.gradient.numeric_grad.abs_rel_err(Ax, b)
-        self.failUnless(numpy.all(are < 1.0e-5), (are, Ax, b))
-        #print A,b
-        #print numpy.dot(A,x)
-
-
-if __name__ == '__main__':
-    unittest.main()
diff -r 27b1344a57b1 -r 8fff4bc26f4c _test_onehotop.py
--- a/_test_onehotop.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,21 +0,0 @@
-from onehotop import one_hot
-
-import unittest
-from theano import compile
-from theano import gradient
-from theano import function
-from theano.tensor import as_tensor
-
-import random
-import numpy.random
-
-class T_OneHot(unittest.TestCase):
-    def test0(self):
-        x = as_tensor([3, 2, 1])
-        y = as_tensor(5)
-        o = one_hot(x, y)
-        f = function([],o)
-        self.failUnless(numpy.all(f() == numpy.asarray([[0, 0, 0, 1, 0], [0, 0, 1, 0, 0], [0, 1, 0, 0, 0]])))
-
-if __name__ == '__main__':
-    unittest.main()
diff -r 27b1344a57b1 -r 8fff4bc26f4c _test_random_transformation.py
--- a/_test_random_transformation.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,84 +0,0 @@
-from random_transformation import row_random_transformation
-
-import unittest
-from theano import compile
-from theano import gradient
-
-from theano.sparse import _is_dense, _is_sparse, _is_dense_result, _is_sparse_result
-from theano.sparse import _mtypes, _mtype_to_str
-from theano.sparse import as_sparse
-
-from theano.tensor import as_tensor
-from theano.scalar import as_scalar
-
-import random
-import numpy.random
-
-class T_RowRandomTransformation(unittest.TestCase):
-    def setUp(self):
-        random.seed(44)
-        numpy.random.seed(44)
-
-    def test_basic(self):
-        rows = 4
-        cols = 20
-        fakeseed = 0
-        length = 3 
-        md = numpy.random.rand(rows, cols)
-        for mtype in _mtypes:
-            m = as_sparse(mtype(md))
-            o = row_random_transformation(m, length, initial_seed=fakeseed)
-            y = compile.eval_outputs([o])
-            expected = "[[ 0.88239119  1.03244463 -1.29297503]\n [ 0.02644961  1.50119695 -0.025081  ]\n [-0.60741013  1.25424625  0.30119422]\n [-1.08659967 -0.35531544 -1.38915467]]"
-            self.failUnless(str(y) == expected)
-
-    def test_length(self):
-        """ Test that if length is increased, we obtain the same results
-        (except longer). """
-
-        for i in range(10):
-            mtype = random.choice(_mtypes)
-            rows = random.randint(1, 20)
-            cols = random.randint(1, 20)
-            fakeseed = random.randint(0, 100)
-            length = random.randint(1, 10)
-            extralength = random.randint(1, 10)
-
-            m = as_sparse(mtype(numpy.random.rand(rows, cols)))
-            o1 = row_random_transformation(m, length, initial_seed=fakeseed)
-            o2 = row_random_transformation(m, length + extralength, initial_seed=fakeseed)
-
-            y1 = compile.eval_outputs([o1])
-            y2 = compile.eval_outputs([o2])
-
-            self.failUnless((y1 == y2[:,:length]).all())
-
-    def test_permute(self):
-        """ Test that if the order of the rows is permuted, we obtain the same results. """
-        for i in range(10):
-            mtype = random.choice(_mtypes)
-            rows = random.randint(2, 20)
-            cols = random.randint(1, 20)
-            fakeseed = random.randint(0, 100)
-            length = random.randint(1, 10)
-
-            permute = numpy.random.permutation(rows)
-
-
-            m1 = numpy.random.rand(rows, cols)
-            m2 = m1[permute]
-            for r in range(rows):
-                self.failUnless((m2[r] == m1[permute[r]]).all())
-            s1 = as_sparse(mtype(m1))
-            s2 = as_sparse(mtype(m2))
-            o1 = row_random_transformation(s1, length, initial_seed=fakeseed)
-            o2 = row_random_transformation(s2, length, initial_seed=fakeseed)
-            y1 = compile.eval_outputs([o1])
-            y2 = compile.eval_outputs([o2])
-
-            self.failUnless(y1.shape == y2.shape)
-            for r in range(rows):
-                self.failUnless((y2[r] == y1[permute[r]]).all())
-
-if __name__ == '__main__':
-    unittest.main()
diff -r 27b1344a57b1 -r 8fff4bc26f4c _test_xlogx.py
--- a/_test_xlogx.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,28 +0,0 @@
-from xlogx import xlogx
-
-import unittest
-
-from theano import compile
-from theano import gradient
-from theano import function
-from theano.tensor import as_tensor
-import theano.tensor.basic as TT
-
-import random
-import numpy.random
-
-class T_XlogX(unittest.TestCase):
-    def test0(self):
-        x = as_tensor([1, 0])
-        y = xlogx(x)
-        f = function([],y)
-        self.failUnless(numpy.all(f() == numpy.asarray([0, 0.])))
-    def test1(self):
-        class Dummy(object):
-            def make_node(self, a):
-                return [xlogx(a)[:,2]]
-        TT.verify_grad(self, Dummy(), [numpy.random.rand(3,4)])
-
-
-if __name__ == '__main__':
-    unittest.main()
diff -r 27b1344a57b1 -r 8fff4bc26f4c activation.py
--- a/activation.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,6 +0,0 @@
-"""
-Activation functions.
-
-@todo: Make an Activation function class, with a particular contract.
-That way, we can swap in Activation functions in our algorithms.
-"""
diff -r 27b1344a57b1 -r 8fff4bc26f4c algorithms/__init__.py
--- a/algorithms/__init__.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,5 +0,0 @@
-
-from regressor import Regressor, BinRegressor
-from aa import AutoEncoder, SigmoidXEAutoEncoder
-from daa import DenoisingAA, SigmoidXEDenoisingAA
-from stacker import Stacker
diff -r 27b1344a57b1 -r 8fff4bc26f4c algorithms/_test_logistic_regression.py
--- a/algorithms/_test_logistic_regression.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,60 +0,0 @@
-from logistic_regression import *
-import sys, time
-
-if __name__ == '__main__':
-    pprint.assign(nnet.crossentropy_softmax_1hot_with_bias_dx, printing.FunctionPrinter('xsoftmaxdx'))
-    pprint.assign(nnet.crossentropy_softmax_argmax_1hot_with_bias, printing.FunctionPrinter('nll', 'softmax', 'argmax'))
-    if 1:
-        lrc = Module_Nclass()
-
-        print '================'
-        print lrc.update.pretty()
-        print '================'
-        print lrc.update.pretty(mode = theano.Mode('py', 'fast_run'))
-        print '================'
-#         print lrc.update.pretty(mode = compile.FAST_RUN.excluding('inplace'))
-#         print '================'
-
-#        sys.exit(0)
-
-        lr = lrc.make(10, 2, mode=theano.Mode('c|py', 'fast_run'))
-        #lr = lrc.make(10, 2, mode=compile.FAST_RUN.excluding('fast_run'))
-        #lr = lrc.make(10, 2, mode=theano.Mode('py', 'merge')) #'FAST_RUN')
-
-        data_x = N.random.randn(5, 10)
-        data_y = (N.random.randn(5) > 0)
-
-        t = time.time()
-        for i in xrange(10000):
-            lr.lr = 0.02
-            xe = lr.update(data_x, data_y) 
-            #if i % 100 == 0:
-            #    print i, xe
-
-        print 'training time:', time.time() - t
-        print 'final error', xe
-
-        #print
-        #print 'TRAINED MODEL:'
-        #print lr
-
-    if 0:
-        lrc = Module()
-
-        lr = lrc.make(10, mode=theano.Mode('c|py', 'merge')) #'FAST_RUN')
-
-        data_x = N.random.randn(5, 10)
-        data_y = (N.random.randn(5, 1) > 0)
-
-        for i in xrange(10000):
-            xe = lr.update(data_x, data_y)
-            if i % 100 == 0:
-                print i, xe
-
-        print
-        print 'TRAINED MODEL:'
-        print lr
-
-
-
-
diff -r 27b1344a57b1 -r 8fff4bc26f4c algorithms/aa.py
--- a/algorithms/aa.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,108 +0,0 @@
-
-import theano
-from theano import tensor as T
-from theano.tensor import nnet as NN
-import numpy as N
-
-class AutoEncoder(theano.FancyModule):
-
-    def __init__(self, input = None, regularize = True, tie_weights = True):
-        super(AutoEncoder, self).__init__()
-
-        # MODEL CONFIGURATION
-        self.regularize = regularize
-        self.tie_weights = tie_weights
-
-        # ACQUIRE/MAKE INPUT
-        if not input:
-            input = T.matrix('input')
-        self.input = theano.External(input)
-
-        # HYPER-PARAMETERS
-        self.lr = theano.Member(T.scalar())
-
-        # PARAMETERS
-        self.w1 = theano.Member(T.matrix())
-        if not tie_weights:
-            self.w2 = theano.Member(T.matrix())
-        else:
-            self.w2 = self.w1.T
-        self.b1 = theano.Member(T.vector())
-        self.b2 = theano.Member(T.vector())
-
-        # HIDDEN LAYER
-        self.hidden_activation = T.dot(input, self.w1) + self.b1
-        self.hidden = self.build_hidden()
-
-        # RECONSTRUCTION LAYER
-        self.output_activation = T.dot(self.hidden, self.w2) + self.b2
-        self.output = self.build_output()
-
-        # RECONSTRUCTION COST
-        self.reconstruction_cost = self.build_reconstruction_cost()
-
-        # REGULARIZATION COST
-        self.regularization = self.build_regularization()
-
-        # TOTAL COST
-        self.cost = self.reconstruction_cost
-        if self.regularize:
-            self.cost = self.cost + self.regularization
-
-        # GRADIENTS AND UPDATES
-        if self.tie_weights:
-            self.params = self.w1, self.b1, self.b2
-        else:
-            self.params = self.w1, self.w2, self.b1, self.b2
-        gradients = T.grad(self.cost, self.params)
-        updates = dict((p, p - self.lr * g) for p, g in zip(self.params, gradients))
-
-        # INTERFACE METHODS
-        self.update = theano.Method(input, self.cost, updates)
-        self.reconstruction = theano.Method(input, self.output)
-        self.representation = theano.Method(input, self.hidden)
-
-    def _instance_initialize(self, obj, input_size = None, hidden_size = None, seed = None, **init):
-        if (input_size is None) ^ (hidden_size is None):
-            raise ValueError("Must specify hidden_size and target_size or neither.")
-        super(AutoEncoder, self)._instance_initialize(obj, **init)
-        if seed is not None:
-            R = N.random.RandomState(seed)
-        else:
-            R = N.random
-        if input_size is not None:
-            sz = (input_size, hidden_size)
-            range = 1/N.sqrt(input_size)
-            obj.w1 = R.uniform(size = sz, low = -range, high = range)
-            if not self.tie_weights:
-                obj.w2 = R.uniform(size = list(reversed(sz)), low = -range, high = range)
-            obj.b1 = N.zeros(hidden_size)
-            obj.b2 = N.zeros(input_size)
-
-    def build_regularization(self):
-        return T.zero() # no regularization!
-
-
-class SigmoidXEAutoEncoder(AutoEncoder):
-
-    def build_hidden(self):
-        return NN.sigmoid(self.hidden_activation)
-
-    def build_output(self):
-        return NN.sigmoid(self.output_activation)
-
-    def build_reconstruction_cost(self):
-        self.reconstruction_cost_matrix = self.input * T.log(self.output) + (1.0 - self.input) * T.log(1.0 - self.output)
-        self.reconstruction_costs = -T.sum(self.reconstruction_cost_matrix, axis=1)
-        return T.sum(self.reconstruction_costs)
-
-    def build_regularization(self):
-        self.l2_coef = theano.Member(T.scalar())
-        if self.tie_weights:
-            return self.l2_coef * T.sum(self.w1 * self.w1)
-        else:
-            return self.l2_coef * T.sum(self.w1 * self.w1) + T.sum(self.w2 * self.w2)
-
-    def _instance_initialize(self, obj, input_size = None, hidden_size = None, **init):
-        init.setdefault('l2_coef', 0)
-        super(SigmoidXEAutoEncoder, self)._instance_initialize(obj, input_size, hidden_size, **init)
diff -r 27b1344a57b1 -r 8fff4bc26f4c algorithms/daa.py
--- a/algorithms/daa.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,186 +0,0 @@
-
-import theano
-from theano import tensor as T
-from theano.tensor import nnet as NN
-import numpy as N
-
-from pylearn import cost as cost
-
-class DenoisingAA(T.RModule):
-    """De-noising Auto-encoder
-
-    WRITEME
-
-    Abstract base class. Requires subclass with functions:
-    
-    - build_corrupted_input()
-
-    Introductory article about this model WRITEME.
-
-
-    """
-
-    def __init__(self, input = None, regularize = True, tie_weights = True,
-            activation_function=NN.sigmoid, reconstruction_cost_function=cost.cross_entropy):
-        """
-        :param input: WRITEME
-
-        :param regularize: WRITEME
-
-        :param tie_weights: WRITEME
-
-        :param activation_function: WRITEME
-
-        :param reconstruction_cost: Should return one cost per example (row)
-
-        :todo: Default noise level for all daa levels
-
-        """
-        super(DenoisingAA, self).__init__()
-
-        # MODEL CONFIGURATION
-        self.regularize = regularize
-        self.tie_weights = tie_weights
-        self.activation_function = activation_function
-        self.reconstruction_cost_function = reconstruction_cost_function
-
-        # ACQUIRE/MAKE INPUT
-        if not input:
-            input = T.matrix('input')
-        self.input = theano.External(input)
-
-        # HYPER-PARAMETERS
-        self.lr = theano.Member(T.scalar())
-
-        # PARAMETERS
-        self.w1 = theano.Member(T.matrix())
-        if not tie_weights:
-            self.w2 = theano.Member(T.matrix())
-        else:
-            self.w2 = self.w1.T
-        self.b1 = theano.Member(T.vector())
-        self.b2 = theano.Member(T.vector())
-
-
-        # REGULARIZATION COST
-        self.regularization = self.build_regularization()
-
-
-        ### NOISELESS ###
-
-        # HIDDEN LAYER
-        self.hidden_activation = T.dot(self.input, self.w1) + self.b1
-        self.hidden = self.hid_activation_function(self.hidden_activation)
-
-        # RECONSTRUCTION LAYER
-        self.output_activation = T.dot(self.hidden, self.w2) + self.b2
-        self.output = self.out_activation_function(self.output_activation)
-
-        # RECONSTRUCTION COST
-        self.reconstruction_costs = self.build_reconstruction_costs(self.output)
-        self.reconstruction_cost = T.mean(self.reconstruction_costs)
-
-        # TOTAL COST
-        self.cost = self.reconstruction_cost
-        if self.regularize:
-            self.cost = self.cost + self.regularization
-
-
-        ### WITH NOISE ###
-        self.corrupted_input = self.build_corrupted_input()
-
-        # HIDDEN LAYER
-        self.nhidden_activation = T.dot(self.corrupted_input, self.w1) + self.b1
-        self.nhidden = self.hid_activation_function(self.nhidden_activation)
-
-        # RECONSTRUCTION LAYER
-        self.noutput_activation = T.dot(self.nhidden, self.w2) + self.b2
-        self.noutput = self.out_activation_function(self.noutput_activation)
-
-        # RECONSTRUCTION COST
-        self.nreconstruction_costs = self.build_reconstruction_costs(self.noutput)
-        self.nreconstruction_cost = T.mean(self.nreconstruction_costs)
-
-        # TOTAL COST
-        self.ncost = self.nreconstruction_cost
-        if self.regularize:
-            self.ncost = self.ncost + self.regularization
-
-
-        # GRADIENTS AND UPDATES
-        if self.tie_weights:
-            self.params = self.w1, self.b1, self.b2
-        else:
-            self.params = self.w1, self.w2, self.b1, self.b2
-        gradients = T.grad(self.ncost, self.params)
-        updates = dict((p, p - self.lr * g) for p, g in zip(self.params, gradients))
-
-        # INTERFACE METHODS
-        self.update = theano.Method(self.input, self.ncost, updates)
-        self.compute_cost = theano.Method(self.input, self.cost)
-        self.noisify = theano.Method(self.input, self.corrupted_input)
-        self.reconstruction = theano.Method(self.input, self.output)
-        self.representation = theano.Method(self.input, self.hidden)
-        self.reconstruction_through_noise = theano.Method(self.input, [self.corrupted_input, self.noutput])
-
-        self.validate = theano.Method(self.input, [self.cost, self.output])
-
-    def _instance_initialize(self, obj, input_size = None, hidden_size = None, seed = None, **init):
-        if (input_size is None) ^ (hidden_size is None):
-            raise ValueError("Must specify input_size and hidden_size or neither.")
-        super(DenoisingAA, self)._instance_initialize(obj, **init)
-        if seed is not None:
-            R = N.random.RandomState(seed)
-        else:
-            R = N.random
-        if input_size is not None:
-            sz = (input_size, hidden_size)
-            inf = 1/N.sqrt(input_size)
-            hif = 1/N.sqrt(hidden_size)
-            obj.w1 = R.uniform(size = sz, low = -inf, high = inf)
-            if not self.tie_weights:
-                obj.w2 = R.uniform(size = list(reversed(sz)), low = -hif, high = hif)
-            obj.b1 = N.zeros(hidden_size)
-            obj.b2 = N.zeros(input_size)
-        if seed is not None:
-            obj.seed(seed)
-        obj.__hide__ = ['params']
-
-    def build_regularization(self):
-        """
-        @todo: Why do we need this function?
-        """
-        return T.zero() # no regularization!
-
-
-class SigmoidXEDenoisingAA(DenoisingAA):
-    """
-    @todo: Merge this into the above.
-    @todo: Default noise level for all daa levels
-    """
-
-    def build_corrupted_input(self):
-        self.noise_level = theano.Member(T.scalar())
-        return self.random.binomial(T.shape(self.input), 1, 1 - self.noise_level) * self.input
-
-    def hid_activation_function(self, activation):
-        return self.activation_function(activation)
-
-    def out_activation_function(self, activation):
-        return self.activation_function(activation)
-
-    def build_reconstruction_costs(self, output):
-        return self.reconstruction_cost_function(self.input, output)
-
-    def build_regularization(self):
-        self.l2_coef = theano.Member(T.scalar())
-        if self.tie_weights:
-            return self.l2_coef * T.sum(self.w1 * self.w1)
-        else:
-            return self.l2_coef * (T.sum(self.w1 * self.w1) + T.sum(self.w2 * self.w2))
-
-    def _instance_initialize(self, obj, input_size = None, hidden_size = None, seed = None, **init):
-        init.setdefault('noise_level', 0)
-        init.setdefault('l2_coef', 0)
-        super(SigmoidXEDenoisingAA, self)._instance_initialize(obj, input_size, hidden_size, seed, **init)
-
diff -r 27b1344a57b1 -r 8fff4bc26f4c algorithms/layer.py
--- a/algorithms/layer.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,11 +0,0 @@
-"""
-@todo: Make a layer class, with standardized names:
-    input, cost, lr, and update
-(a Method called update, to be more precise, whose first argument is the input)
-
-input_dimension, output_dimension (aliased as nin and nout)
-
-Modules like pylearn.algorithms.logistic_regression.Module_Nclass and
-pylearn.algorithms.???.Bin_Regressor should inherit from Layer and
-Stacker should assume Layer.
-"""
diff -r 27b1344a57b1 -r 8fff4bc26f4c algorithms/logistic_regression.py
--- a/algorithms/logistic_regression.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,111 +0,0 @@
-import theano
-from theano import tensor as T
-from theano.tensor import nnet
-from theano.compile import module
-from theano import printing, pprint
-from theano import compile
-
-import numpy as N
-
-class LogRegInstanceType(module.FancyModuleInstance):
-    def initialize(self, n_in, n_out, lr, seed):
-        #self.component is the LogisticRegressionTemplate instance that built this guy.
-        """
-        @todo: Remove seed. Used only to keep Stacker happy.
-        """
-
-        self.w = N.zeros((n_in, n_out))
-        self.b = N.zeros(n_out)
-        self.lr = lr
-        self.__hide__ = ['params']
-        self.input_dimension = n_in
-        self.output_dimension = n_out
-
-class Module_Nclass(module.FancyModule):
-    InstanceType = LogRegInstanceType
-
-    def __init__(self, x=None, targ=None, w=None, b=None, lr=None, regularize=False):
-        super(Module_Nclass, self).__init__() #boilerplate
-
-        self.x = module.Member(x) if x is not None else T.matrix('input')
-        self.targ = module.Member(targ) if targ is not None else T.lvector()
-
-        self.w = module.Member(w) if w is not None else module.Member(T.dmatrix())
-        self.b = module.Member(b) if b is not None else module.Member(T.dvector())
-        self.lr = module.Member(lr) if lr is not None else module.Member(T.dscalar())
-
-        self.params = [p for p in [self.w, self.b] if p.owner is None]
-
-        linear_output = T.dot(self.x, self.w) + self.b
-
-        (xent, softmax, max_pr, argmax) = nnet.crossentropy_softmax_max_and_argmax_1hot(
-                linear_output, self.targ)
-        sum_xent = T.sum(xent)
-
-        self.softmax = softmax
-        self.argmax = argmax 
-        self.max_pr = max_pr
-        self.sum_xent = sum_xent
-
-        # Softmax being computed directly.
-        softmax_unsupervised = nnet.softmax(linear_output)
-        self.softmax_unsupervised = softmax_unsupervised
-
-        #compatibility with current implementation of stacker/daa or something
-        #TODO: remove this, make a wrapper
-        self.cost = self.sum_xent
-        self.input = self.x
-        # TODO: I want to make output = linear_output.
-        self.output = self.softmax_unsupervised
-
-        #define the apply method
-        self.pred = T.argmax(linear_output, axis=1)
-        self.apply = module.Method([self.input], self.pred)
-
-        self.validate = module.Method([self.input, self.targ], [self.cost, self.argmax, self.max_pr])
-        self.softmax_output = module.Method([self.input], self.softmax_unsupervised)
-
-        if self.params:
-            gparams = T.grad(sum_xent, self.params)
-
-            self.update = module.Method([self.input, self.targ], sum_xent,
-                    updates = dict((p, p - self.lr * g) for p, g in zip(self.params, gparams)))
-
-class Module(module.FancyModule):
-    InstanceType = LogRegInstanceType
-
-    def __init__(self, input=None, targ=None, w=None, b=None, lr=None, regularize=False):
-        super(Module, self).__init__() #boilerplate
-
-        self.input = module.Member(input) if input is not None else T.matrix('input')
-        self.targ = module.Member(targ) if targ is not None else T.lcol()
-
-        self.w = module.Member(w) if w is not None else module.Member(T.dmatrix())
-        self.b = module.Member(b) if b is not None else module.Member(T.dvector())
-        self.lr = module.Member(lr) if lr is not None else module.Member(T.dscalar())
-
-        self.params = [p for p in [self.w, self.b] if p.owner is None]
-
-        output = nnet.sigmoid(T.dot(self.x, self.w) + self.b)
-        xent = -self.targ * T.log(output) - (1.0 - self.targ) * T.log(1.0 - output)
-        sum_xent = T.sum(xent)
-
-        self.output = output
-        self.xent = xent
-        self.sum_xent = sum_xent
-        self.cost = sum_xent
-
-        #define the apply method
-        self.pred = (T.dot(self.input, self.w) + self.b) > 0.0
-        self.apply = module.Method([self.input], self.pred)
-
-        #if this module has any internal parameters, define an update function for them
-        if self.params:
-            gparams = T.grad(sum_xent, self.params)
-            self.update = module.Method([self.input, self.targ], sum_xent,
-                                        updates = dict((p, p - self.lr * g) for p, g in zip(self.params, gparams)))
-
-class Learner(object):
-    """TODO: Encapsulate the algorithm for finding an optimal regularization coefficient"""
-    pass
-
diff -r 27b1344a57b1 -r 8fff4bc26f4c algorithms/regressor.py
--- a/algorithms/regressor.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,104 +0,0 @@
-
-import theano
-from theano import tensor as T
-from theano.tensor import nnet as NN
-import numpy as N
-
-class Regressor(theano.FancyModule):
-
-    def __init__(self, input = None, target = None, regularize = True):
-        super(Regressor, self).__init__()
-
-        # MODEL CONFIGURATION
-        self.regularize = regularize
-
-        # ACQUIRE/MAKE INPUT AND TARGET
-        self.input = theano.External(input) if input else T.matrix('input')
-        self.target = theano.External(target) if target else T.matrix('target')
-
-        # HYPER-PARAMETERS
-        self.lr = theano.Member(T.scalar())
-
-        # PARAMETERS
-        self.w = theano.Member(T.matrix())
-        self.b = theano.Member(T.vector())
-
-        # OUTPUT
-        self.output_activation = T.dot(self.input, self.w) + self.b
-        self.output = self.build_output()
-
-        # REGRESSION COST
-        self.regression_cost = self.build_regression_cost()
-
-        # REGULARIZATION COST
-        self.regularization = self.build_regularization()
-
-        # TOTAL COST
-        self.cost = self.regression_cost
-        if self.regularize:
-            self.cost = self.cost + self.regularization
-
-        # GRADIENTS AND UPDATES
-        self.params = self.w, self.b
-        gradients = T.grad(self.cost, self.params)
-        updates = dict((p, p - self.lr * g) for p, g in zip(self.params, gradients))
-
-        # INTERFACE METHODS
-        self.update = theano.Method([self.input, self.target], self.cost, updates)
-        self.get_cost = theano.Method([self.input, self.target], self.cost)
-        self.predict = theano.Method(self.input, self.output)
-
-        self.build_extensions()
-
-    def _instance_initialize(self, obj, input_size = None, output_size = None, seed = None, **init):
-        if seed is not None:
-            R = N.random.RandomState(seed)
-        else:
-            R = N.random
-        if (input_size is None) ^ (output_size is None):
-            raise ValueError("Must specify input_size and output_size or neither.")
-        super(Regressor, self)._instance_initialize(obj, **init)
-        if input_size is not None:
-            sz = (input_size, output_size)
-            range = 1/N.sqrt(input_size)
-            obj.w = R.uniform(size = sz, low = -range, high = range)
-            obj.b = N.zeros(output_size)
-        obj.__hide__ = ['params']
-
-    def _instance_flops_approx(self, obj):
-        return obj.w.size
-
-    def build_extensions(self):
-        pass
-
-    def build_output(self):
-        raise NotImplementedError('override in subclass')
-
-    def build_regression_cost(self):
-        raise NotImplementedError('override in subclass')
-
-    def build_regularization(self):
-        return T.zero() # no regularization!
-
-
-class BinRegressor(Regressor):
-
-    def build_extensions(self):
-        self.classes = T.iround(self.output)
-        self.classify = theano.Method(self.input, self.classes)
-
-    def build_output(self):
-        return NN.sigmoid(self.output_activation)
-
-    def build_regression_cost(self):
-        self.regression_cost_matrix = self.target * T.log(self.output) + (1.0 - self.target) * T.log(1.0 - self.output)
-        self.regression_costs = -T.sum(self.regression_cost_matrix, axis=1)
-        return T.mean(self.regression_costs)
-
-    def build_regularization(self):
-        self.l2_coef = theano.Member(T.scalar())
-        return self.l2_coef * T.sum(self.w * self.w)
-
-    def _instance_initialize(self, obj, input_size = None, output_size = 1, seed = None, **init):
-        init.setdefault('l2_coef', 0)
-        super(BinRegressor, self)._instance_initialize(obj, input_size, output_size, seed, **init)
diff -r 27b1344a57b1 -r 8fff4bc26f4c algorithms/sgd.py
--- a/algorithms/sgd.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,14 +0,0 @@
-
-from theano.compile import module
-from theano import tensor as T
-
-class StochasticGradientDescent(module.FancyModule):
-    def __init__(self, params, gparams, lr=None):
-        super(StochasticGradientDescent, self).__init__()
-
-        self.lr = lr if lr is not None else module.Member(T.dscalar())
-        self.params = params
-        self.gparams = gparams
-
-        self.updates = dict((p, p - self.lr * g) for p, g in zip(self.params, self.gparams))
-
diff -r 27b1344a57b1 -r 8fff4bc26f4c algorithms/stacker.py
--- a/algorithms/stacker.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,101 +0,0 @@
-
-# for example in examples:
-#     repr = example
-#     for layer in stacked.layers:
-#         layer.update(repr)
-#         repr = layer.representation(repr)
-
-import theano
-from theano import tensor as T
-import sys
-import numpy as N
-
-class Stacker(T.RModule):
-    """
-    @note: Assumes some names in the layers: input, cost, lr, and update
-    @todo: Maybe compile functions on demand, rather than immediately.
-    """
-
-    def __init__(self, submodules, input = None, regularize = False):
-        super(Stacker, self).__init__()
-
-        current = input
-        layers = []
-        for i, (submodule, outname) in enumerate(submodules):
-            layer = submodule(current, regularize = regularize)
-            layers.append(layer)
-            current = layer[outname]
-        self.layers = layers
-
-        self.input = self.layers[0].input
-        self.output = current
-
-        representation = []
-        local_update = []
-        global_update = []
-        to_update = []
-        all_kits = []
-        for layer, (submodule, outname) in zip(layers, submodules):
-            u = layer.update
-            u.resolve_all()
-            to_update += u.updates.keys()
-            all_kits += u.kits
-            # the input is the whole deep model's input instead of the layer's own
-            # input (which is previous_layer[outname])
-            inputs = [self.input] + u.inputs[1:]
-            method = theano.Method(inputs, u.outputs, u.updates, u.kits)
-            local_update.append(method)
-            global_update.append(
-                theano.Method(inputs,
-                              u.outputs,
-                              # we update the params of the previous layers too but wrt
-                              # this layer's cost
-                              dict((param, param - layer.lr * T.grad(layer.cost, param))
-                                   for param in to_update),
-                              list(all_kits)))
-            representation.append(theano.Method(self.input, layer[outname]))
-
-#           @todo: Add diagnostics
-#             self.diagnose_from_input = Method([self.input], self.layers[0].diagnose.outputs + self.layers[1].diagnose.outputs ...
-
-        self.local_update = local_update
-        self.global_update = global_update
-        self.representation = representation
-        self.update = self.global_update[-1]
-        self.compute = theano.Method(self.input, self.output)
-        ll = self.layers[-1]
-        for name, method in ll.components_map():
-            if isinstance(method, theano.Method) and not hasattr(self, name):
-                m = method.dup()
-                m.resolve_all()
-                m.inputs = [self.input if x is ll.input else x for x in m.inputs]
-                setattr(self, name, m)
-
-    def _instance_initialize(self, obj, nunits = None, lr = 0.01, seed = None, **kwargs):
-        super(Stacker, self)._instance_initialize(obj, **kwargs)
-        if seed is not None:
-            R = N.random.RandomState(seed)
-        else:
-            R = N.random
-        for layer in obj.layers:
-            if layer.lr is None:
-                layer.lr = lr
-        if nunits:
-            obj.input_dimension = nunits[0]
-            obj.output_dimension = nunits[-1]
-            if len(nunits) != len(obj.layers) + 1:
-                raise ValueError('You should give exactly one more unit numbers as there are layers.')
-            for ni, no, layer in zip(nunits[:-1], nunits[1:], obj.layers):
-                if seed is not None:
-                    layer.initialize(ni, no, seed = R.random_integers(sys.maxint - 1))
-                else:
-                    layer.initialize(ni, no)
-        if seed is not None:
-            obj.seed(seed)
-
-    def _instance_flops_approx(self, obj):
-        rval = 0
-        for layer in obj.layers:
-            rval += layer.flops_approx()
-        return rval
-
diff -r 27b1344a57b1 -r 8fff4bc26f4c algorithms/tests/test_aa.py
--- a/algorithms/tests/test_aa.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,43 +0,0 @@
-#from __future__ import absolute_imports
-
-from pylearn import algorithms as models
-import theano
-import numpy
-import time
-
-
-def test_train(mode = theano.Mode('c|py', 'fast_run')):
-
-    aa = models.SigmoidXEAutoEncoder(regularize = False)
-#     print aa.update.pretty(mode = theano.Mode('py', 'fast_run').excluding('inplace'))
-
-    model = aa.make(lr = 0.01,
-                    input_size = 100,
-                    hidden_size = 1000,
-                    mode = mode)
-
-    data = [[0, 1, 0, 0, 1, 1, 1, 0, 1, 0]*10]*10
-    #data = numpy.random.rand(10, 100)
-
-    t1 = time.time()
-    for i in xrange(1001):
-        cost = model.update(data)
-        if i % 100 == 0:
-            print i, cost
-    t2 = time.time()
-    return t2 - t1
-
-if __name__ == '__main__':
-    numpy.random.seed(10)
-    print 'optimized:'
-    t1 = test_train(theano.Mode('c|py', 'fast_run'))
-    print 'time:',t1
-    print
-
-    numpy.random.seed(10)
-    print 'not optimized:'
-    t2 = test_train(theano.Mode('c|py', 'fast_compile'))
-    print 'time:',t2
-
-
-
diff -r 27b1344a57b1 -r 8fff4bc26f4c algorithms/tests/test_daa.py
--- a/algorithms/tests/test_daa.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,90 +0,0 @@
-#!/usr/bin/python
-
-from pylearn import algorithms as models
-import theano
-import numpy
-import time
-
-import pylearn.algorithms.logistic_regression
-
-def test_train_daa(mode = theano.Mode('c|py', 'fast_run')):
-
-    ndaa = 3
-    daa = models.Stacker([(models.SigmoidXEDenoisingAA, 'hidden')] * ndaa + [(models.BinRegressor, 'output')],
-                         regularize = False)
-
-    model = daa.make([4, 20, 20, 20, 1],
-                     lr = 0.01,
-                     mode = mode,
-                     seed = 10)
-
-    model.layers[0].noise_level = 0.3
-    model.layers[1].noise_level = 0.3
-    model.layers[2].noise_level = 0.3
-
-    # Update the first hidden layer
-    for l in range(3):
-        for i in range(10):
-            model.local_update[l]([[0, 1, 0, 1]])
-            model.local_update[l]([[1, 0, 1, 0]])
-
-    for i in range(10):
-        model.update([[0, 1, 0, 1]], [[1]])
-        model.update([[1, 0, 1, 0]], [[0]])
-    print model.classify([[0, 1, 0, 1]])
-    print model.classify([[1, 0, 1, 0]])
-
-
-def test_train_daa2(mode = theano.Mode('c|py', 'fast_run')):
-
-    ndaa = 3
-    daa = models.Stacker([(models.SigmoidXEDenoisingAA, 'hidden')] * ndaa + [(pylearn.algorithms.logistic_regression.Module_Nclass, 'pred')],
-                         regularize = False)
-
-    model = daa.make([4] + [20] * ndaa + [10],
-                     lr = 0.01,
-                     mode = mode,
-                     seed = 10)
-
-    for l in range(ndaa): model.layers[l].noise_level = 0.3
-
-    instances = [([[0, 1, 0, 1]], [1]), ([[1, 0, 1, 0]], [0])]
-
-    for l in range(ndaa):
-        for i in range(10):
-            for (input, output) in instances:
-                model.local_update[l](input)
-
-    for i in range(10):
-        for (input, output) in instances:
-#            model.update(input, output)
-            print "OLD:", 
-            print model.validate(input, output)
-            oldloss = model.update(input, output)
-            print oldloss
-            print "NEW:"
-            print model.validate(input, output)
-            print 
-
-    print model.apply([[0, 1, 0, 1]])
-    print model.apply([[1, 0, 1, 0]])
-
-
-
-
-if __name__ == '__main__':
-#    print 'optimized:'
-#    t1 = test_train_daa(theano.Mode('py', 'fast_compile'))
-#    t1 = test_train_daa(theano.Mode('c|py', 'fast_run'))
-#    print 'time:',t1
-#    print
-
-#    print 'not optimized:'
-#    t2 = test_train_daa(theano.Mode('c|py', 'fast_compile'))
-##    print 'time:',t2
-
-#    test_train_daa(theano.compile.Mode('c&py', 'merge'))
-#    test_train_daa(theano.compile.Mode('c|py', 'merge'))
-    test_train_daa(theano.compile.Mode('py', 'merge'))
-
-    test_train_daa2(theano.compile.Mode('c|py', 'merge'))
diff -r 27b1344a57b1 -r 8fff4bc26f4c algorithms/tests/test_regressor.py
--- a/algorithms/tests/test_regressor.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,46 +0,0 @@
-
-
-import models
-import theano
-import numpy
-import time
-
-
-def test_train(mode = theano.Mode('c|py', 'fast_run')):
-
-    reg = models.BinRegressor(regularize = False)
-
-    model = reg.make(lr = 0.01,
-                     input_size = 100,
-                     mode = mode,
-                     seed = 10)
-
-#     data = [[0, 1, 0, 0, 1, 1, 1, 0, 1, 0]*10]*10
-#     targets = [[1]]*10
-    #data = numpy.random.rand(10, 100)
-
-    R = numpy.random.RandomState(100)
-    t1 = time.time()
-    for i in xrange(1001):
-        data = R.random_integers(0, 1, size = (10, 100))
-        targets = data[:, 6].reshape((10, 1))
-        cost = model.update(data, targets)
-        if i % 100 == 0:
-            print i, '\t', cost, '\t', 1*(targets.T == model.classify(data).T)
-    t2 = time.time()
-    return t2 - t1
-
-if __name__ == '__main__':
-    print 'optimized:'
-    t1 = test_train(theano.Mode('c|py', 'fast_run'))
-    print 'time:',t1
-    print
-
-    print 'not optimized:'
-    t2 = test_train(theano.Mode('c|py', 'fast_compile'))
-    print 'time:',t2
-
-
-
-
-
diff -r 27b1344a57b1 -r 8fff4bc26f4c algorithms/tests/test_stacker.py
--- a/algorithms/tests/test_stacker.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,43 +0,0 @@
-
-import models
-import theano
-import numpy
-import time
-
-
-def test_train(mode = theano.Mode('c|py', 'fast_run')):
-
-    reg = models.Stacker([(models.BinRegressor, 'output'), (models.BinRegressor, 'output')],
-                         regularize = False)
-    #print reg.global_update[1].pretty(mode = mode.excluding('inplace'))
-
-    model = reg.make([100, 200, 1],
-                     lr = 0.01,
-                     mode = mode,
-                     seed = 10)
-
-    R = numpy.random.RandomState(100)
-    t1 = time.time()
-    for i in xrange(1001):
-        data = R.random_integers(0, 1, size = (10, 100))
-        targets = data[:, 6].reshape((10, 1))
-        cost = model.update(data, targets)
-        if i % 100 == 0:
-            print i, '\t', cost, '\t', 1*(targets.T == model.classify(data).T)
-    t2 = time.time()
-    return t2 - t1
-
-if __name__ == '__main__':
-    print 'optimized:'
-    t1 = test_train(theano.Mode('c|py', 'fast_run'))
-    print 'time:',t1
-    print
-
-    print 'not optimized:'
-    t2 = test_train(theano.Mode('c|py', 'fast_compile'))
-    print 'time:',t2
-
-
-
-
-
diff -r 27b1344a57b1 -r 8fff4bc26f4c amat.py
--- a/amat.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,120 +0,0 @@
-"""load PLearn AMat files"""
-
-import sys, numpy, array
-
-class AMat:
-    """DataSource to access a plearn amat file as a periodic unrandomized stream.
-
-    Attributes:
-
-    input -- all columns of input
-    target -- all columns of target
-    weight -- all columns of weight
-    extra -- all columns of extra
-
-    all -- the entire data contents of the amat file
-    n_examples -- the number of training examples in the file
-
-    AMat stands for Ascii Matri[x,ces]
-
-    """
-
-    marker_size = '#size:'
-    marker_sizes = '#sizes:'
-    marker_col_names = '#:'
-
-    def __init__(self, path, head=None, update_interval=0, ofile=sys.stdout):
-
-        """Load the amat at <path> into memory.
-        
-        path - str: location of amat file
-        head - int: stop reading after this many data rows
-        update_interval - int: print '.' to ofile every <this many> lines
-        ofile - file: print status, msgs, etc. to this file
-
-        """
-        self.all = None
-        self.input = None
-        self.target = None
-        self.weight = None
-        self.extra = None
-
-        self.header = False
-        self.header_size = None
-        self.header_rows = None
-        self.header_cols = None
-        self.header_sizes = None
-        self.header_col_names = []
-
-        data_started = False
-        data = array.array('d')
-        
-        f = open(path)
-        n_data_lines = 0
-        len_float_line = None
-
-        for i,line in enumerate(f):
-            if n_data_lines == head:
-                #we've read enough data, 
-                # break even if there's more in the file
-                break
-            if len(line) == 0 or line == '\n':
-                continue
-            if line[0] == '#':
-                if not data_started:
-                    #the condition means that the file has a header, and we're on 
-                    # some header line
-                    self.header = True
-                    if line.startswith(AMat.marker_size):
-                        info = line[len(AMat.marker_size):]
-                        self.header_size = [int(s) for s in info.split()]
-                        self.header_rows, self.header_cols = self.header_size
-                    if line.startswith(AMat.marker_col_names):
-                        info = line[len(AMat.marker_col_names):]
-                        self.header_col_names = info.split()
-                    elif line.startswith(AMat.marker_sizes):
-                        info = line[len(AMat.marker_sizes):]
-                        self.header_sizes = [int(s) for s in info.split()]
-            else:
-                #the first non-commented line tells us that the header is done
-                data_started = True
-                float_line = [float(s) for s in line.split()]
-                if len_float_line is None:
-                    len_float_line = len(float_line)
-                    if (self.header_cols is not None) \
-                            and self.header_cols != len_float_line:
-                        print >> sys.stderr, \
-                                'WARNING: header declared %i cols but first line has %i, using %i',\
-                                self.header_cols, len_float_line, len_float_line
-                else:
-                    if len_float_line != len(float_line):
-                        raise IOError('wrong line length', i, line)
-                data.extend(float_line)
-                n_data_lines += 1
-
-                if update_interval > 0 and (ofile is not None) \
-                        and n_data_lines % update_interval == 0:
-                    ofile.write('.')
-                    ofile.flush()
-
-        if update_interval > 0:
-            ofile.write('\n')
-        f.close()
-
-        # convert from array.array to numpy.ndarray
-        nshape = (len(data) / len_float_line, len_float_line)
-        self.all = numpy.frombuffer(data).reshape(nshape)
-        self.n_examples = self.all.shape[0]
-
-        # assign
-        if self.header_sizes is not None:
-            if len(self.header_sizes) > 4:
-                print >> sys.stderr, 'WARNING: ignoring sizes after 4th in %s' % path
-            leftmost = 0
-            #here we make use of the fact that if header_sizes has len < 4
-            # the loop will exit before 4 iterations
-            attrlist = ['input', 'target', 'weight', 'extra']
-            for attr, ncols in zip(attrlist, self.header_sizes): 
-                setattr(self, attr, self.all[:, leftmost:leftmost+ncols])
-                leftmost += ncols
-
diff -r 27b1344a57b1 -r 8fff4bc26f4c autotest.py
--- a/autotest.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,54 +0,0 @@
-import unittest, os, sys, traceback
-
-def test_root_dir(debugmode=False):
-    suite = None
-    filenames = os.listdir('.')
-    for filename in filenames:
-        if filename[-3:] == '.py' and filename.startswith('_test'):
-            #print >>sys.stderr, 'Loading', modname
-            modname = filename[0:-3]
-
-            try:
-                module = __import__(modname)
-            except Exception, e:
-                print >>sys.stderr, "===================================================="
-                print >>sys.stderr, "Failed to load %s.py" % modname
-                print >>sys.stderr, "===================================================="
-                traceback.print_exc()
-                print >>sys.stderr, "===================================================="
-                continue
-                
-            tests = unittest.TestLoader().loadTestsFromModule(module)
-            if tests.countTestCases() > 0:
-                print >>sys.stderr, 'Testing', modname
-                if suite is None:
-                    suite = tests
-                else:
-                    suite.addTests(tests)
-    if suite is None:
-        print >>sys.stderr, "No suite found"
-        sys.exit(1)
-    if debugmode:
-        suite.debug()
-    else:
-        unittest.TextTestRunner(verbosity=1).run(suite)
-
-if __name__ == '__main__':
-
-    def printUsage():
-        print >>sys.stderr, "Bad argument: ",sys.argv
-        print >>sys.stderr, "only --debug is supported"
-        sys.exit(1)
-    debugparam=""
-
-    if len(sys.argv)==2:
-        if sys.argv[1]=="--debug":
-            debugparam="--debug"
-            sys.argv.remove(debugparam)
-        else:
-            printUsage()
-    elif len(sys.argv)>2:
-        printUsage()
-
-    test_root_dir(debugparam!="")
-
diff -r 27b1344a57b1 -r 8fff4bc26f4c cost.py
--- a/cost.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,32 +0,0 @@
-"""
-Cost functions.
-
-@note: All of these functions return one cost per example. So it is your
-job to perform a tensor.sum over the individual example losses.
-
-@todo: Make a Cost class, with a particular contract.
-
-@todo: It would be nice to implement a hinge loss, with a particular margin.
-"""
-
-import theano.tensor as T
-from xlogx import xlogx
-
-def quadratic(target, output, axis=1):
-    return T.mean(T.sqr(target - output), axis=axis)
-
-def cross_entropy(target, output, axis=1):
-    """
-    @todo: This is essentially duplicated as nnet_ops.binary_crossentropy
-    @warning: OUTPUT and TARGET are reversed in nnet_ops.binary_crossentropy
-    """
-    return -T.mean(target * T.log(output) + (1 - target) * T.log(1 - output), axis=axis)
-
-def KL_divergence(target, output):
-    """
-    @note: We do not compute the mean, because if target and output have
-    different shapes then the result will be garbled.
-    """
-    return -(target * T.log(output) + (1 - target) * T.log(1 - output)) \
-            + (xlogx(target) + xlogx(1 - target))
-#    return cross_entropy(target, output, axis) - cross_entropy(target, target, axis)
diff -r 27b1344a57b1 -r 8fff4bc26f4c dataset.py
--- a/dataset.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1533 +0,0 @@
-
-from lookup_list import LookupList as Example
-from common.misc import unique_elements_list_intersection
-from string import join
-from sys import maxint
-import numpy, copy
-
-from exceptions import *
-
-class AttributesHolder(object):
-    def __init__(self): pass
-
-    def attributeNames(self):
-        raise AbstractFunction()
-
-    def setAttributes(self,attribute_names,attribute_values,make_copies=False):
-        """
-        Allow the attribute_values to not be a list (but a single value) if the attribute_names is of length 1.
-        """
-        if len(attribute_names)==1 and not (isinstance(attribute_values,list) or isinstance(attribute_values,tuple) ):
-            attribute_values = [attribute_values]
-        if make_copies:
-            for name,value in zip(attribute_names,attribute_values):
-                self.__setattr__(name,copy.deepcopy(value))
-        else:
-            for name,value in zip(attribute_names,attribute_values):
-                self.__setattr__(name,value)
-
-    def getAttributes(self,attribute_names=None, return_copy=False):
-        """
-        Return all (if attribute_names=None, in the order of attributeNames()) or a specified subset of attributes.
-        """
-        if attribute_names is None:
-            attribute_names = self.attributeNames()
-        if return_copy:
-            return [copy.copy(self.__getattribute__(name)) for name in attribute_names]
-        else:
-            return [self.__getattribute__(name) for name in attribute_names]
-    
-class DataSet(AttributesHolder):
-    """A virtual base class for datasets.
-
-    A DataSet can be seen as a generalization of a matrix, meant to be used in conjunction
-    with learning algorithms (for training and testing them): rows/records are called examples, and
-    columns/attributes are called fields. The field value for a particular example can be an arbitrary
-    python object, which depends on the particular dataset.
-    
-    We call a DataSet a 'stream' when its length is unbounded (in which case its __len__ method
-    should return sys.maxint).
-
-    A DataSet is a generator of iterators; these iterators can run through the
-    examples or the fields in a variety of ways.  A DataSet need not necessarily have a finite
-    or known length, so this class can be used to interface to a 'stream' which
-    feeds on-line learning (however, as noted below, some operations are not
-    feasible or not recommended on streams).
-
-    To iterate over examples, there are several possibilities:
-     - for example in dataset:
-     - for val1,val2,... in dataset:
-     - for example in dataset(field1, field2,field3, ...):
-     - for val1,val2,val3 in dataset(field1, field2,field3):
-     - for minibatch in dataset.minibatches([field1, field2, ...],minibatch_size=N):
-     - for mini1,mini2,mini3 in dataset.minibatches([field1, field2, field3], minibatch_size=N):
-     Each of these is documented below. All of these iterators are expected
-     to provide, in addition to the usual 'next()' method, a 'next_index()' method
-     which returns a non-negative integer pointing to the position of the next
-     example that will be returned by 'next()' (or of the first example in the
-     next minibatch returned). This is important because these iterators
-     can wrap around the dataset in order to do multiple passes through it,
-     in possibly unregular ways if the minibatch size is not a divisor of the
-     dataset length.
-    
-    To iterate over fields, one can do
-     - for field in dataset.fields():
-         for field_value in field: # iterate over the values associated to that field for all the dataset examples
-     - for field in dataset(field1,field2,...).fields() to select a subset of fields
-     - for field in dataset.fields(field1,field2,...) to select a subset of fields
-    and each of these fields is iterable over the examples:
-     - for field_examples in dataset.fields():
-        for example_value in field_examples:
-           ...
-    but when the dataset is a stream (unbounded length), it is not recommended to do 
-    such things because the underlying dataset may refuse to access the different fields in
-    an unsynchronized ways. Hence the fields() method is illegal for streams, by default.
-    The result of fields() is a L{DataSetFields} object, which iterates over fields,
-    and whose elements are iterable over examples. A DataSetFields object can
-    be turned back into a DataSet with its examples() method::
-      dataset2 = dataset1.fields().examples()
-    and dataset2 should behave exactly like dataset1 (in fact by default dataset2==dataset1).
-    
-    Note: Fields are not mutually exclusive, i.e. two fields can overlap in their actual content.
-
-    Note: The content of a field can be of any type. Field values can also be 'missing'
-    (e.g. to handle semi-supervised learning), and in the case of numeric (numpy array)
-    fields (i.e. an ArrayFieldsDataSet), NaN plays the role of a missing value. 
-    What about non-numeric values? None.
-
-    Dataset elements can be indexed and sub-datasets (with a subset
-    of examples) can be extracted. These operations are not supported
-    by default in the case of streams.
-
-     - dataset[:n] returns an Example with the n first examples.
-
-     - dataset[i1:i2:s] returns an Example with the examples i1,i1+s,...i2-s.
-
-     - dataset[i] returns an Example.
-
-     - dataset[[i1,i2,...in]] returns an Example with examples i1,i2,...in.
-
-    A similar command gives you a DataSet instead of Examples :
-
-     - dataset.subset[:n] returns a DataSet with the n first examples.
-
-     - dataset.subset[i1:i2:s] returns a DataSet with the examples i1,i1+s,...i2-s.
-
-     - dataset.subset[i] returns a DataSet.
-
-     - dataset.subset[[i1,i2,...in]] returns a DataSet with examples i1,i2,...in.
-
-
-     - dataset.<property> returns the value of a property associated with
-     the name <property>. The following properties should be supported:
-          - 'description': a textual description or name for the dataset
-          - 'fieldtypes': a list of types (one per field)
-    A DataSet may have other attributes that it makes visible to other objects. These are
-    used to store information that is not example-wise but global to the dataset.
-    The list of names of these attributes is given by the attribute_names() method.
-
-    Datasets can be concatenated either vertically (increasing the length) or
-    horizontally (augmenting the set of fields), if they are compatible, using
-    the following operations (with the same basic semantics as numpy.hstack
-    and numpy.vstack):
-
-     - dataset1 | dataset2 | dataset3 == dataset.hstack([dataset1,dataset2,dataset3])
-
-    creates a new dataset whose list of fields is the concatenation of the list of
-    fields of the argument datasets. This only works if they all have the same length.
-
-     - dataset1 & dataset2 & dataset3 == dataset.vstack([dataset1,dataset2,dataset3])
-
-    creates a new dataset that concatenates the examples from the argument datasets
-    (and whose length is the sum of the length of the argument datasets). This only
-    works if they all have the same fields.
-
-    According to the same logic, and viewing a DataSetFields object associated to
-    a DataSet as a kind of transpose of it, fields1 & fields2 concatenates fields of
-    a DataSetFields fields1 and fields2, and fields1 | fields2 concatenates their
-    examples.
-
-    A dataset can hold arbitrary key-value pairs that may be used to access meta-data
-    or other properties of the dataset or associated with the dataset or the result
-    of a computation stored in a dataset. These can be accessed through the [key] syntax
-    when key is a string (or more specifically, neither an integer, a slice, nor a list).
-
-    A DataSet sub-class should always redefine the following methods:
-       - __len__ if it is not a stream
-       - fieldNames
-       - minibatches_nowrap (called by DataSet.minibatches())
-    For efficiency of implementation, a sub-class might also want to redefine
-       - valuesHStack
-       - valuesVStack
-       - hasFields
-       - __getitem__ may not be feasible with some streams
-       - __iter__
-    A sub-class should also append attributes to self._attribute_names
-    (the default value returned by attributeNames()).
-    By convention, attributes not in attributeNames() should have a name
-    starting with an underscore.
-    @todo enforce/test that convention!
-    """
-
-    numpy_vstack = lambda fieldname,values: numpy.vstack(values)
-    numpy_hstack = lambda fieldnames,values: numpy.hstack(values)
-        
-    def __init__(self, description=None, fieldnames=None, fieldtypes=None):
-        """
-        @type fieldnames: list of strings
-        @type fieldtypes: list of python types, same length as fieldnames
-        @type description: string 
-        @param description: description/name for this dataset
-        """
-        def default_desc():
-            return type(self).__name__ \
-                    + " ( " + join([x.__name__ for x in type(self).__bases__]) + " )"
-
-        #self.fieldnames = fieldnames
-
-        self.fieldtypes = fieldtypes if fieldtypes is not None \
-                else [None]*1 #len(fieldnames)
-
-        self.description =  default_desc() if description is None \
-                else description
-        self._attribute_names = ["description"]
-
-
-    attributeNames = property(lambda self: copy.copy(self._attribute_names))
-
-    def __contains__(self, fieldname):
-        return (fieldname in self.fieldNames()) \
-                or (fieldname in self.attributeNames())
-
-    def __iter__(self):
-        """Supports the syntax "for i in dataset: ..."
-
-        Using this syntax, "i" will be an Example instance (or equivalent) with
-        all the fields of DataSet self.  Every field of "i" will give access to
-        a field of a single example.  Fields should be accessible via
-        i["fielname"] or i[3] (in the order defined by the elements of the
-        Example returned by this iterator), but the derived class is free
-        to accept any type of identifier, and add extra functionality to the iterator.
-
-        The default implementation calls the minibatches iterator and extracts the first example of each field.
-        """
-        return DataSet.MinibatchToSingleExampleIterator(self.minibatches(None, minibatch_size = 1))
-
-    def __len__(self):
-        """
-        len(dataset) returns the number of examples in the dataset.
-        By default, a DataSet is a 'stream', i.e. it has an unbounded length (sys.maxint).
-        Sub-classes which implement finite-length datasets should redefine this method.
-        Some methods only make sense for finite-length datasets.
-        """
-        from sys import maxint
-        return maxint
-
-
-    class MinibatchToSingleExampleIterator(object):
-        """
-        Converts the result of minibatch iterator with minibatch_size==1 into
-        single-example values in the result. Therefore the result of
-        iterating on the dataset itself gives a sequence of single examples
-        (whereas the result of iterating over minibatches gives in each
-        Example field an iterable object over the individual examples in
-        the minibatch).
-        """
-        def __init__(self, minibatch_iterator):
-            self.minibatch_iterator = minibatch_iterator
-            self.minibatch = None
-        def __iter__(self): #makes for loop work
-            return self
-        def next(self):
-            size1_minibatch = self.minibatch_iterator.next()
-            if not self.minibatch:
-                names = size1_minibatch.keys()
-                # next lines are a hack, but there was problem when we were getting [array(327)] for instance
-                try:
-                    values = [value[0] for value in size1_minibatch.values()]
-                except :
-                    values = [value for value in size1_minibatch.values()]
-                self.minibatch = Example(names,values)
-            else:
-                self.minibatch._values = [value[0] for value in size1_minibatch.values()]
-            return self.minibatch
-        
-        def next_index(self):
-            return self.minibatch_iterator.next_index()
-
-    class MinibatchWrapAroundIterator(object):
-        """
-        An iterator for minibatches that handles the case where we need to wrap around the
-        dataset because n_batches*minibatch_size > len(dataset). It is constructed from
-        a dataset that provides a minibatch iterator that does not need to handle that problem.
-        This class is a utility for dataset subclass writers, so that they do not have to handle
-        this issue multiple times, nor check that fieldnames are valid, nor handle the
-        empty fieldnames (meaning 'use all the fields').
-        """
-        def __init__(self,dataset,fieldnames,minibatch_size,n_batches,offset):
-            self.dataset=dataset
-            self.fieldnames=fieldnames
-            self.minibatch_size=minibatch_size
-            self.n_batches=n_batches
-            self.n_batches_done=0
-            self.next_row=offset
-            self.L=len(dataset)
-            self.offset=offset % self.L
-            ds_nbatches =  (self.L-self.next_row)/self.minibatch_size
-            if n_batches is not None:
-                ds_nbatches = min(n_batches,ds_nbatches)
-            if fieldnames:
-                assert dataset.hasFields(*fieldnames)
-            else:
-                self.fieldnames=dataset.fieldNames()
-            self.iterator = self.dataset.minibatches_nowrap(self.fieldnames,self.minibatch_size, ds_nbatches,self.next_row)
-
-        def __iter__(self):
-            return self
-
-        def next_index(self):
-            return self.next_row
-
-        def next(self):
-            if self.n_batches and self.n_batches_done==self.n_batches:
-                raise StopIteration
-            elif not self.n_batches and self.next_row ==self.L:
-                raise StopIteration
-            upper = self.next_row+self.minibatch_size
-            if upper <=self.L:
-                minibatch = self.iterator.next()
-            else:
-                if not self.n_batches:
-                    upper=min(upper, self.L)
-                    # if their is not a fixed number of batch, we continue to the end of the dataset.
-                    # this can create a minibatch that is smaller then the minibatch_size
-                    assert (self.L-self.next_row)<=self.minibatch_size
-                    minibatch = self.dataset.minibatches_nowrap(self.fieldnames,self.L-self.next_row,1,self.next_row).next()
-                else:
-                    # we must concatenate (vstack) the bottom and top parts of our minibatch
-                    # first get the beginning of our minibatch (top of dataset)
-                    first_part = self.dataset.minibatches_nowrap(self.fieldnames,self.L-self.next_row,1,self.next_row).next()
-                    second_part = self.dataset.minibatches_nowrap(self.fieldnames,upper-self.L,1,0).next()
-                    minibatch = Example(self.fieldnames,
-                                        [self.dataset.valuesVStack(name,[first_part[name],second_part[name]])
-                                         for name in self.fieldnames])
-            self.next_row=upper
-            self.n_batches_done+=1
-            if upper >= self.L and self.n_batches:
-                self.next_row -= self.L
-                ds_nbatches =  (self.L-self.next_row)/self.minibatch_size
-                if self.n_batches is not None:
-                    ds_nbatches = min(self.n_batches,ds_nbatches)
-                self.iterator = self.dataset.minibatches_nowrap(self.fieldnames,self.minibatch_size,
-                                                                ds_nbatches,self.next_row)
-            return DataSetFields(MinibatchDataSet(minibatch,self.dataset.valuesVStack,
-                                                  self.dataset.valuesHStack),
-                                 minibatch.keys())
-
-
-    minibatches_fieldnames = None
-    minibatches_minibatch_size = 1
-    minibatches_n_batches = None
-    def minibatches(self,
-                    fieldnames = minibatches_fieldnames,
-                    minibatch_size = minibatches_minibatch_size,
-                    n_batches = minibatches_n_batches,
-                    offset = 0):
-        """
-        Return an iterator that supports three forms of syntax:
-
-            for i in dataset.minibatches(None,**kwargs): ...
-
-            for i in dataset.minibatches([f1, f2, f3],**kwargs): ...
-
-            for i1, i2, i3 in dataset.minibatches([f1, f2, f3],**kwargs): ...
-
-        Using the first two syntaxes, "i" will be an indexable object, such as a list,
-        tuple, or Example instance. In both cases, i[k] is a list-like container
-        of a batch of current examples. In the second case, i[0] is
-        list-like container of the f1 field of a batch current examples, i[1] is
-        a list-like container of the f2 field, etc.
-
-        Using the first syntax, all the fields will be returned in "i".
-        Using the third syntax, i1, i2, i3 will be list-like containers of the
-        f1, f2, and f3 fields of a batch of examples on each loop iteration.
-
-        The minibatches iterator is expected to return upon each call to next()
-        a DataSetFields object, which is a Example (indexed by the field names) whose
-        elements are iterable and indexable over the minibatch examples, and which keeps a pointer to
-        a sub-dataset that can be used to iterate over the individual examples
-        in the minibatch. Hence a minibatch can be converted back to a regular
-        dataset or its fields can be looked at individually (and possibly iterated over).
-
-        PARAMETERS
-        - fieldnames (list of any type, default None):
-        The loop variables i1, i2, i3 (in the example above) should contain the
-        f1, f2, and f3 fields of the current batch of examples.  If None, the
-        derived class can choose a default, e.g. all fields.
-
-        - minibatch_size (integer, default 1)
-        On every iteration, the variables i1, i2, i3 will have
-        exactly minibatch_size elements. e.g. len(i1) == minibatch_size
-
-        @DEPRECATED n_batches : not used anywhere
-        - n_batches (integer, default None)
-        The iterator will loop exactly this many times, and then stop.  If None,
-        the derived class can choose a default.  If (-1), then the returned
-        iterator should support looping indefinitely.
-
-        - offset (integer, default 0)
-        The iterator will start at example 'offset' in the dataset, rather than the default.
-        
-        Note: A list-like container is something like a tuple, list, numpy.ndarray or
-        any other object that supports integer indexing and slicing.
-
-        @ATTENTION: now minibatches returns minibatches_nowrap, which is supposed to return complete
-        batches only, raise StopIteration.
-        @ATTENTION: minibatches returns a LookupList, we can't iterate over examples on it.
-
-        """
-        #return DataSet.MinibatchWrapAroundIterator(self, fieldnames, minibatch_size, n_batches,offset)
-        assert offset >= 0
-        assert offset < len(self)
-        assert offset + minibatch_size -1 < len(self)
-        if fieldnames == None :
-            fieldnames = self.fieldNames()
-        return self.minibatches_nowrap(fieldnames,minibatch_size,n_batches,offset)
-
-    def minibatches_nowrap(self,fieldnames,minibatch_size,n_batches,offset):
-        """
-        This is the minibatches iterator generator that sub-classes must define.
-        It does not need to worry about wrapping around multiple times across the dataset,
-        as this is handled by MinibatchWrapAroundIterator when DataSet.minibatches() is called.
-        The next() method of the returned iterator does not even need to worry about
-        the termination condition (as StopIteration will be raised by DataSet.minibatches
-        before an improper call to minibatches_nowrap's next() is made).
-        That next() method can assert that its next row will always be within [0,len(dataset)).
-        The iterator returned by minibatches_nowrap does not need to implement
-        a next_index() method either, as this will be provided by MinibatchWrapAroundIterator.
-        """
-        raise AbstractFunction()
-
-    def is_unbounded(self):
-        """
-        Tests whether a dataset is unbounded (e.g. a stream).
-        """
-        return len(self)==maxint
-
-    def hasFields(self,*fieldnames):
-        """
-        Return true if the given field name (or field names, if multiple arguments are
-        given) is recognized by the DataSet (i.e. can be used as a field name in one
-        of the iterators).
-
-        The default implementation may be inefficient (O(# fields in dataset)), as it calls the fieldNames()
-        method. Many datasets may store their field names in a dictionary, which would allow more efficiency.
-        """
-        return len(unique_elements_list_intersection(fieldnames,self.fieldNames()))>0
-        
-    def fieldNames(self):
-        """
-        Return the list of field names that are supported by the iterators,
-        and for which hasFields(fieldname) would return True.
-        """
-        raise AbstractFunction()
-
-    def __call__(self,*fieldnames):
-        """
-        Return a dataset that sees only the fields whose name are specified.
-        """
-        assert self.hasFields(*fieldnames)
-        #return self.fields(*fieldnames).examples()
-        fieldnames_list = list(fieldnames)
-        return FieldsSubsetDataSet(self,fieldnames_list)
-
-    def cached_fields_subset(self,*fieldnames) :
-        """
-        Behaviour is supposed to be the same as __call__(*fieldnames), but the dataset returned is cached.
-        @see : dataset.__call__
-        """
-        assert self.hasFields(*fieldnames)
-        return self.fields(*fieldnames).examples()
-
-    def fields(self,*fieldnames):
-        """
-        Return a DataSetFields object associated with this dataset.
-        """
-        return DataSetFields(self,fieldnames)
-
-    def getitem_key(self, fieldname):
-        """A not-so-well thought-out place to put code that used to be in
-        getitem.
-        """
-        #removing as per discussion June 4. --JSB
-
-        i = fieldname
-        # else check for a fieldname
-        if self.hasFields(i):
-            return self.minibatches(fieldnames=[i],minibatch_size=len(self),n_batches=1,offset=0).next()[0]
-        # else we are trying to access a property of the dataset
-        assert i in self.__dict__ # else it means we are trying to access a non-existing property
-        return self.__dict__[i]
-
-    def __getitem__(self,i):
-        """
-        @rtype: Example 
-        @returns: single or multiple examples
-
-        @type i: integer or slice or <iterable> of integers
-        @param i:
-            dataset[i] returns the (i+1)-th example of the dataset.
-            dataset[i:j] returns a LookupList with examples i,i+1,...,j-1.
-            dataset[i:j:s] returns a LookupList with examples i,i+2,i+4...,j-2.
-            dataset[[i1,i2,..,in]] returns a LookupList with examples i1,i2,...,in.
-
-        @note:
-        Some stream datasets may be unable to implement random access, i.e.
-        arbitrary slicing/indexing because they can only iterate through
-        examples one or a minibatch at a time and do not actually store or keep
-        past (or future) examples.
-
-        The default implementation of getitem uses the minibatches iterator
-        to obtain one example, one slice, or a list of examples. It may not
-        always be the most efficient way to obtain the result, especially if
-        the data are actually stored in a memory array.
-        """
-
-        if type(i) is int:
-            assert i >= 0 # TBM: see if someone complains and want negative i
-            if i >= len(self) :
-                raise IndexError
-            i_batch = self.minibatches_nowrap(self.fieldNames(),
-                    minibatch_size=1, n_batches=1, offset=i)
-            return DataSet.MinibatchToSingleExampleIterator(i_batch).next()
-
-        #if i is a contiguous slice
-        if type(i) is slice and (i.step in (None, 1)):
-            offset = 0 if i.start is None else i.start
-            upper_bound = len(self) if i.stop is None else i.stop
-            upper_bound = min(len(self) , upper_bound)
-            #return MinibatchDataSet(self.minibatches_nowrap(self.fieldNames(),
-            #        minibatch_size=upper_bound - offset,
-            #        n_batches=1,
-            #        offset=offset).next())
-            # now returns a LookupList
-            return self.minibatches_nowrap(self.fieldNames(),
-                    minibatch_size=upper_bound - offset,
-                    n_batches=1,
-                    offset=offset).next()
-
-        # if slice has a step param, convert it to list and handle it with the
-        # list code
-        if type(i) is slice:
-            offset = 0 if i.start is None else i.start
-            upper_bound = len(self) if i.stop is None else i.stop
-            upper_bound = min(len(self) , upper_bound)
-            i = list(range(offset, upper_bound, i.step))
-
-        # handle tuples, arrays, lists
-        if hasattr(i, '__getitem__'):
-            for idx in i:
-                #dis-allow nested slices
-                if not isinstance(idx, int):
-                    raise TypeError(idx)
-                if idx >= len(self) :
-                    raise IndexError
-            # call back into self.__getitem__
-            examples = [self.minibatches_nowrap(self.fieldNames(),
-                    minibatch_size=1, n_batches=1, offset=ii).next()
-                    for ii in i]
-            # re-index the fields in each example by field instead of by example
-            field_values = [[] for blah in  self.fieldNames()]
-            for e in examples:
-                for f,v in zip(field_values, e):
-                    f.append(v)
-            #build them into a LookupList (a.ka. Example)
-            zz = zip(self.fieldNames(),field_values)
-            vst = [self.valuesVStack(fieldname,field_values) for fieldname,field_values in zz]
-            example = Example(self.fieldNames(), vst)
-            #return MinibatchDataSet(example, self.valuesVStack, self.valuesHStack)
-            # now returns a LookupList
-            return example
-
-        # what in the world is i?
-        raise TypeError(i, type(i))
-
-
-    """
-    Enables the call dataset.subset[a:b:c] that will return a DataSet
-    around the examples returned by __getitem__(slice(a,b,c))
-       
-    @SEE DataSet.__getsubset(self)
-    """
-    subset = property(lambda s : s.__getsubset(),doc="returns a subset as a DataSet")
-
-
-    def __getsubset(self) :
-        """
-        Enables the call data.subset[a:b:c], returns a DataSet.
-        Default implementation is a simple wrap around __getitem__() using MinibatchDataSet.
-
-        @RETURN DataSet
-        @SEE DataSet.subset = property(lambda s : s.__getsubset())
-        """
-        _self = self
-        class GetSliceReturnsDataSet(object) :
-            def __getitem__(self,slice) :
-                return MinibatchDataSet(_self.__getitem__(slice))
-        return GetSliceReturnsDataSet()
-
-
-
-    def valuesHStack(self,fieldnames,fieldvalues):
-        """
-        Return a value that corresponds to concatenating (horizontally) several field values.
-        This can be useful to merge some fields. The implementation of this operation is likely
-        to involve a copy of the original values. When the values are numpy arrays, the
-        result should be numpy.hstack(values). If it makes sense, this operation should
-        work as well when each value corresponds to multiple examples in a minibatch
-        e.g. if each value is a Ni-vector and a minibatch of length L is a LxNi matrix,
-        then the result should be a Lx(N1+N2+..) matrix equal to numpy.hstack(values).
-        The default is to use numpy.hstack for numpy.ndarray values, and a list
-        pointing to the original values for other data types.
-        """
-        all_numpy=True
-        for value in fieldvalues:
-            if not type(value) is numpy.ndarray:
-                all_numpy=False
-        if all_numpy:
-            return numpy.hstack(fieldvalues)
-        # the default implementation of horizontal stacking is to put values in a list
-        return fieldvalues
-
-    def valuesVStack(self,fieldname,values):
-        """
-        @param fieldname: the name of the field from which the values were taken 
-        @type fieldname: any type 
-
-        @param values: bits near the beginning or end of the dataset 
-        @type values: list of minibatches (returned by minibatches_nowrap) 
-
-        @return: the concatenation (stacking) of the values 
-        @rtype: something suitable as a minibatch field 
-        """
-        rval = []
-        for v in values:
-            rval.extend(v)
-        return rval
-
-    def __or__(self,other):
-        """
-        dataset1 | dataset2 returns a dataset whose list of fields is the concatenation of the list of
-        fields of the argument datasets. This only works if they all have the same length.
-        """
-        return HStackedDataSet([self,other])
-
-    def __and__(self,other):
-        """
-        dataset1 & dataset2 is a dataset that concatenates the examples from the argument datasets
-        (and whose length is the sum of the length of the argument datasets). This only
-        works if they all have the same fields.
-        """
-        return VStackedDataSet([self,other])
-
-def hstack(datasets):
-    """
-    hstack(dataset1,dataset2,...) returns dataset1 | datataset2 | ...
-    which is a dataset whose fields list is the concatenation of the fields
-    of the individual datasets.
-    """
-    assert len(datasets)>0
-    if len(datasets)==1:
-        return datasets[0]
-    return HStackedDataSet(datasets)
-
-def vstack(datasets):
-    """
-    vstack(dataset1,dataset2,...) returns dataset1 & datataset2 & ...
-    which is a dataset which iterates first over the examples of dataset1, then
-    over those of dataset2, etc.
-    """
-    assert len(datasets)>0
-    if len(datasets)==1:
-        return datasets[0]
-    return VStackedDataSet(datasets)
-
-class FieldsSubsetDataSet(DataSet):
-    """
-    A sub-class of L{DataSet} that selects a subset of the fields.
-    """
-    def __init__(self,src,fieldnames):
-        self.src=src
-        self.fieldnames=fieldnames
-        assert src.hasFields(*fieldnames)
-        self.valuesHStack = src.valuesHStack
-        self.valuesVStack = src.valuesVStack
-
-    def __len__(self): return len(self.src)
-    
-    def fieldNames(self):
-        return self.fieldnames
-
-    def __iter__(self):
-        class FieldsSubsetIterator(object):
-            def __init__(self,ds):
-                self.ds=ds
-                self.src_iter=ds.src.__iter__()
-                self.example=None
-            def __iter__(self): return self
-            def next(self):
-                complete_example = self.src_iter.next()
-                if self.example:
-                    self.example._values=[complete_example[field]
-                                          for field in self.ds.fieldnames]
-                else:
-                    self.example=Example(self.ds.fieldnames,
-                                         [complete_example[field] for field in self.ds.fieldnames])
-                return self.example
-        return FieldsSubsetIterator(self)
-
-    def minibatches_nowrap(self,fieldnames,minibatch_size,n_batches,offset):
-        assert self.hasFields(*fieldnames)
-        return self.src.minibatches_nowrap(fieldnames,minibatch_size,n_batches,offset)
-    def dontuse__getitem__(self,i):
-        return FieldsSubsetDataSet(self.src[i],self.fieldnames)
-    
-class RenamedFieldsDataSet(DataSet):
-    """
-    A sub-class of L{DataSet} that selects and renames a subset of the fields.
-    """
-    def __init__(self,src,src_fieldnames,new_fieldnames):
-        self.src=src
-        self.src_fieldnames=src_fieldnames
-        self.new_fieldnames=new_fieldnames
-        assert src.hasFields(*src_fieldnames)
-        assert len(src_fieldnames)==len(new_fieldnames)
-        self.valuesHStack = src.valuesHStack
-        self.valuesVStack = src.valuesVStack
-        self.lookup_fields = Example(new_fieldnames,src_fieldnames)
-
-    def __len__(self): return len(self.src)
-    
-    def fieldNames(self):
-        return self.new_fieldnames
-
-    def __iter__(self):
-        class FieldsSubsetIterator(object):
-            def __init__(self,ds):
-                self.ds=ds
-                self.src_iter=ds.src.__iter__()
-                self.example=None
-            def __iter__(self): return self
-            def next(self):
-                complete_example = self.src_iter.next()
-                if self.example:
-                    self.example._values=[complete_example[field]
-                                          for field in self.ds.src_fieldnames]
-                else:
-                    self.example=Example(self.ds.new_fieldnames,
-                                         [complete_example[field]
-                                          for field in self.ds.src_fieldnames])
-                return self.example
-        return FieldsSubsetIterator(self)
-
-    def minibatches_nowrap(self,fieldnames,minibatch_size,n_batches,offset):
-        assert self.hasFields(*fieldnames)
-        cursor = Example(fieldnames,[0]*len(fieldnames))
-        for batch in self.src.minibatches_nowrap([self.lookup_fields[f] for f in fieldnames],minibatch_size,n_batches,offset):
-            cursor._values=batch._values
-            yield cursor
-    
-    def __getitem__(self,i):
-#        return FieldsSubsetDataSet(self.src[i],self.new_fieldnames)
-        complete_example = self.src[i]
-        return Example(self.new_fieldnames,
-                             [complete_example[field]
-                              for field in self.src_fieldnames])
-
-
-
-class DataSetFields(Example):
-    """
-    Although a L{DataSet} iterates over examples (like rows of a matrix), an associated
-    DataSetFields iterates over fields (like columns of a matrix), and can be understood
-    as a transpose of the associated dataset.
-
-    To iterate over fields, one can do
-    * for fields in dataset.fields()
-    * for fields in dataset(field1,field2,...).fields() to select a subset of fields
-    * for fields in dataset.fields(field1,field2,...) to select a subset of fields
-    and each of these fields is iterable over the examples:
-    * for field_examples in dataset.fields():
-        for example_value in field_examples:
-           ...
-    but when the dataset is a stream (unbounded length), it is not recommended to do 
-    such things because the underlying dataset may refuse to access the different fields in
-    an unsynchronized ways. Hence the fields() method is illegal for streams, by default.
-    The result of fields() is a DataSetFields object, which iterates over fields,
-    and whose elements are iterable over examples. A DataSetFields object can
-    be turned back into a DataSet with its examples() method:
-      dataset2 = dataset1.fields().examples()
-    and dataset2 should behave exactly like dataset1 (in fact by default dataset2==dataset1).
-
-    DataSetFields can be concatenated vertically or horizontally. To be consistent with
-    the syntax used for DataSets, the | concatenates the fields and the & concatenates
-    the examples.
-    """
-    def __init__(self,dataset,fieldnames):
-        original_dataset=dataset
-        if not fieldnames:
-            fieldnames=dataset.fieldNames()
-        elif not list(fieldnames)==list(dataset.fieldNames()):
-            #we must cast to list, othersize('x','y')!=['x','y']
-            dataset = FieldsSubsetDataSet(dataset,fieldnames)
-        assert dataset.hasFields(*fieldnames)
-        self.dataset=dataset
-
-        if isinstance(dataset,MinibatchDataSet):
-            Example.__init__(self,fieldnames,list(dataset._fields))
-        elif isinstance(original_dataset,MinibatchDataSet):
-            Example.__init__(self,fieldnames,
-                                [original_dataset._fields[field]
-                                 for field in fieldnames])
-        else:
-            minibatch_iterator = dataset.minibatches(fieldnames,
-                                                     minibatch_size=len(dataset),
-                                                     n_batches=1)
-            minibatch=minibatch_iterator.next()
-            Example.__init__(self,fieldnames,minibatch)
-        
-    def examples(self):
-        return self.dataset
-    
-    def __or__(self,other):
-        """
-        fields1 | fields2 is a DataSetFields that whose list of examples is the concatenation
-        of the list of examples of DataSetFields fields1 and fields2.
-        """
-        return (self.examples() + other.examples()).fields()
-
-    def __and__(self,other):
-        """
-        fields1 + fields2 is a DataSetFields that whose list of fields is the concatenation
-        of the fields of DataSetFields fields1 and fields2.
-        """
-        return (self.examples() | other.examples()).fields()
-
-    
-class MinibatchDataSet(DataSet):
-    """
-    Turn a L{Example} of same-length (iterable) fields into an example-iterable dataset.
-    Each element of the lookup-list should be an iterable and sliceable, all of the same length.
-    """
-    def __init__(self,fields_lookuplist,values_vstack=DataSet().valuesVStack,
-                 values_hstack=DataSet().valuesHStack):
-        """
-        The user can (and generally should) also provide values_vstack(fieldname,fieldvalues)
-        and a values_hstack(fieldnames,fieldvalues) functions behaving with the same
-        semantics as the DataSet methods of the same name (but without the self argument).
-        """
-
-        self._fields=fields_lookuplist
-        assert len(fields_lookuplist)>0
-        self.length=len(fields_lookuplist[0])
-        for field in fields_lookuplist[1:]:
-            if self.length != len(field) :
-                print 'self.length = ',self.length
-                print 'len(field) = ', len(field)
-                print 'self._fields.keys() = ', self._fields.keys()
-                print 'field=',field
-                print 'fields_lookuplist=', fields_lookuplist
-            assert self.length==len(field)
-        self.valuesVStack=values_vstack
-        self.valuesHStack=values_hstack
-
-    def __len__(self):
-        return self.length
-
-    def dontuse__getitem__(self,i):
-        if type(i) in (slice,list):
-            return DataSetFields(MinibatchDataSet(
-                Example(self._fields.keys(),[field[i] for field in self._fields])),self.fieldNames())
-        if type(i) is int:
-            return Example(self._fields.keys(),[field[i] for field in self._fields])
-        if self.hasFields(i):
-            return self._fields[i]
-        assert i in self.__dict__ # else it means we are trying to access a non-existing property
-        return self.__dict__[i]
-
-    def fieldNames(self):
-        return self._fields.keys()
-
-    def hasFields(self,*fieldnames):
-        for fieldname in fieldnames:
-            if fieldname not in self._fields.keys():
-                return False
-        return True
-
-    def minibatches_nowrap(self,fieldnames,minibatch_size,n_batches,offset):
-        #@TODO bug somewhere here, fieldnames doesnt seem to be well handled
-        class Iterator(object):
-            def __init__(self,ds,fieldnames):
-                # tbm: added two next lines to handle fieldnames
-                if fieldnames is None: fieldnames = ds._fields.keys()
-                self.fieldnames = fieldnames
-
-                self.ds=ds
-                self.next_example=offset
-                assert minibatch_size >= 0
-                if offset+minibatch_size > ds.length:
-                    raise NotImplementedError()
-            def __iter__(self):
-                return self
-            def next(self):
-                upper = self.next_example+minibatch_size
-                if upper > len(self.ds) :
-                    raise StopIteration()
-                assert upper<=len(self.ds) # instead of self.ds.length
-                #minibatch = Example(self.ds._fields.keys(),
-                #                    [field[self.next_example:upper]
-                #                     for field in self.ds._fields])
-                # tbm: modif to use fieldnames
-                values = []
-                for f in self.fieldnames :
-                    #print 'we have field',f,'in fieldnames'
-                    values.append( self.ds._fields[f][self.next_example:upper] )
-                minibatch = Example(self.fieldnames,values)
-                #print minibatch
-                self.next_example+=minibatch_size
-                return minibatch
-
-        # tbm: added fieldnames to handle subset of fieldnames
-        return Iterator(self,fieldnames)
-
-class HStackedDataSet(DataSet):
-    """
-    A L{DataSet} that wraps several datasets and shows a view that includes all their fields,
-    i.e. whose list of fields is the concatenation of their lists of fields.
-
-    If a field name is found in more than one of the datasets, then either an error is
-    raised or the fields are renamed (either by prefixing the __name__ attribute 
-    of the dataset + ".", if it exists, or by suffixing the dataset index in the argument list).
-
-    @todo: automatically detect a chain of stacked datasets due to A | B | C | D ...
-    """
-    def __init__(self,datasets,accept_nonunique_names=False,description=None,field_types=None):
-        DataSet.__init__(self,description,field_types)
-        self.datasets=datasets
-        self.accept_nonunique_names=accept_nonunique_names
-        self.fieldname2dataset={}
-
-        def rename_field(fieldname,dataset,i):
-            if hasattr(dataset,"__name__"):
-                return dataset.__name__ + "." + fieldname
-            return fieldname+"."+str(i)
-            
-        # make sure all datasets have the same length and unique field names
-        self.length=None
-        names_to_change=[]
-        for i in xrange(len(datasets)):
-            dataset = datasets[i]
-            length=len(dataset)
-            if self.length:
-                assert self.length==length
-            else:
-                self.length=length
-            for fieldname in dataset.fieldNames():
-                if fieldname in self.fieldname2dataset: # name conflict!
-                    if accept_nonunique_names:
-                        fieldname=rename_field(fieldname,dataset,i)
-                        names2change.append((fieldname,i))
-                    else:
-                        raise ValueError("Incompatible datasets: non-unique field name = "+fieldname)
-                self.fieldname2dataset[fieldname]=i
-        for fieldname,i in names_to_change:
-            del self.fieldname2dataset[fieldname]
-            self.fieldname2dataset[rename_field(fieldname,self.datasets[i],i)]=i
-            
-    def __len__(self):
-        return len(self.datasets[0])
-    
-    def hasFields(self,*fieldnames):
-        for fieldname in fieldnames:
-            if not fieldname in self.fieldname2dataset:
-                return False
-        return True
-
-    def fieldNames(self):
-        return self.fieldname2dataset.keys()
-            
-    def minibatches_nowrap(self,fieldnames,minibatch_size,n_batches,offset):
-
-        class HStackedIterator(object):
-            def __init__(self,hsds,iterators):
-                self.hsds=hsds
-                self.iterators=iterators
-            def __iter__(self):
-                return self
-            def next(self):
-                # concatenate all the fields of the minibatches
-                l=Example()
-                for iter in self.iterators:
-                    l.append_lookuplist(iter.next())
-                return l
-                                     
-        assert self.hasFields(*fieldnames)
-        # find out which underlying datasets are necessary to service the required fields
-        # and construct corresponding minibatch iterators
-        if fieldnames and fieldnames!=self.fieldNames():
-            datasets=set([])
-            fields_in_dataset=dict([(dataset,[]) for dataset in datasets])
-            for fieldname in fieldnames:
-                dataset=self.datasets[self.fieldname2dataset[fieldname]]
-                datasets.add(dataset)
-                fields_in_dataset[dataset].append(fieldname)
-            datasets=list(datasets)
-            iterators=[dataset.minibatches(fields_in_dataset[dataset],minibatch_size,n_batches,offset)
-                       for dataset in datasets]
-        else:
-            datasets=self.datasets
-            iterators=[dataset.minibatches(None,minibatch_size,n_batches,offset) for dataset in datasets]
-        return HStackedIterator(self,iterators)
-
-
-    def untested_valuesVStack(self,fieldname,fieldvalues):
-        return self.datasets[self.fieldname2dataset[fieldname]].valuesVStack(fieldname,fieldvalues)
-    
-    def untested_valuesHStack(self,fieldnames,fieldvalues):
-        """
-        We will use the sub-dataset associated with the first fieldname in the fieldnames list
-        to do the work, hoping that it can cope with the other values (i.e. won't care
-        about the incompatible fieldnames). Hence this heuristic will always work if
-        all the fieldnames are of the same sub-dataset.
-        """
-        return self.datasets[self.fieldname2dataset[fieldnames[0]]].valuesHStack(fieldnames,fieldvalues)
-
-class VStackedDataSet(DataSet):
-    """
-    A L{DataSet} that wraps several datasets and shows a view that includes all their examples,
-    in the order provided. This clearly assumes that they all have the same field names
-    and all (except possibly the last one) are of finite length.
-
-    @todo: automatically detect a chain of stacked datasets due to A + B + C + D ...
-    """
-    def __init__(self,datasets):
-        self.datasets=datasets
-        self.length=0
-        self.index2dataset={}
-        assert len(datasets)>0
-        fieldnames = datasets[-1].fieldNames()
-        self.datasets_start_row=[]
-        # We use this map from row index to dataset index for constant-time random access of examples,
-        # to avoid having to search for the appropriate dataset each time and slice is asked for.
-        for dataset,k in enumerate(datasets[0:-1]):
-            assert dataset.is_unbounded() # All VStacked datasets (except possibly the last) must be bounded (have a length).
-            L=len(dataset)
-            for i in xrange(L):
-                self.index2dataset[self.length+i]=k
-            self.datasets_start_row.append(self.length)
-            self.length+=L
-            assert dataset.fieldNames()==fieldnames
-        self.datasets_start_row.append(self.length)
-        self.length+=len(datasets[-1])
-        # If length is very large, we should use a more memory-efficient mechanism
-        # that does not store all indices
-        if self.length>1000000:
-            # 1 million entries would require about 60 meg for the index2dataset map
-            # TODO
-            print "A more efficient mechanism for index2dataset should be implemented"
-
-    def __len__(self):
-        return self.length
-    
-    def fieldNames(self):
-        return self.datasets[0].fieldNames()
-
-    def hasFields(self,*fieldnames):
-        return self.datasets[0].hasFields(*fieldnames)
-
-    def locate_row(self,row):
-        """Return (dataset_index, row_within_dataset) for global row number"""
-        dataset_index = self.index2dataset[row]
-        row_within_dataset = self.datasets_start_row[dataset_index]
-        return dataset_index, row_within_dataset
-        
-    def minibatches_nowrap(self,fieldnames,minibatch_size,n_batches,offset):
-
-        class VStackedIterator(object):
-            def __init__(self,vsds):
-                self.vsds=vsds
-                self.next_row=offset
-                self.next_dataset_index,self.next_dataset_row=self.vsds.locate_row(offset)
-                self.current_iterator,self.n_left_at_the_end_of_ds,self.n_left_in_mb= \
-                  self.next_iterator(vsds.datasets[0],offset,n_batches)
-
-            def next_iterator(self,dataset,starting_offset,batches_left):
-                L=len(dataset)
-                ds_nbatches = (L-starting_offset)/minibatch_size
-                if batches_left is not None:
-                    ds_nbatches = max(batches_left,ds_nbatches)
-                if minibatch_size>L:
-                    ds_minibatch_size=L
-                    n_left_in_mb=minibatch_size-L
-                    ds_nbatches=1
-                else:
-                    n_left_in_mb=0
-                return dataset.minibatches(fieldnames,minibatch_size,ds_nbatches,starting_offset), \
-                       L-(starting_offset+ds_nbatches*minibatch_size), n_left_in_mb
-
-            def move_to_next_dataset(self):
-                if self.n_left_at_the_end_of_ds>0:
-                    self.current_iterator,self.n_left_at_the_end_of_ds,self.n_left_in_mb= \
-                      self.next_iterator(vsds.datasets[self.next_dataset_index],
-                                         self.n_left_at_the_end_of_ds,1)
-                else:
-                    self.next_dataset_index +=1
-                    if self.next_dataset_index==len(self.vsds.datasets):
-                        self.next_dataset_index = 0
-                    self.current_iterator,self.n_left_at_the_end_of_ds,self.n_left_in_mb= \
-                      self.next_iterator(vsds.datasets[self.next_dataset_index],starting_offset,n_batches)
-                
-            def __iter__(self):
-                return self
-
-            def next(self):
-                dataset=self.vsds.datasets[self.next_dataset_index]
-                mb = self.next_iterator.next()
-                if self.n_left_in_mb:
-                    extra_mb = []
-                    while self.n_left_in_mb>0:
-                        self.move_to_next_dataset()
-                        extra_mb.append(self.next_iterator.next())
-                    mb = Example(fieldnames,
-                                       [dataset.valuesVStack(name,
-                                                             [mb[name]]+[b[name] for b in extra_mb])
-                                            for name in fieldnames])
-                    
-                self.next_row+=minibatch_size
-                self.next_dataset_row+=minibatch_size
-                if self.next_row+minibatch_size>len(dataset):
-                    self.move_to_next_dataset()
-                return examples
-        return VStackedIterator(self)
-                        
-class ArrayFieldsDataSet(DataSet):
-    """
-    Virtual super-class of datasets whose field values are numpy array,
-    thus defining valuesHStack and valuesVStack for sub-classes.
-    """
-    def __init__(self,description=None,field_types=None):
-        DataSet.__init__(self,description,field_types)
-    def untested_valuesHStack(self,fieldnames,fieldvalues):
-        """Concatenate field values horizontally, e.g. two vectors
-        become a longer vector, two matrices become a wider matrix, etc."""
-        return numpy.hstack(fieldvalues)
-    def untested_valuesVStack(self,fieldname,values):
-        """Concatenate field values vertically, e.g. two vectors
-        become a two-row matrix, two matrices become a longer matrix, etc."""
-        return numpy.vstack(values)
-
-
-
-class NArraysDataSet(ArrayFieldsDataSet) :
-    """
-    An NArraysDataSet stores fields that are numpy tensor, whose first axis
-    iterates over examples. It's a generalization of ArrayDataSet.
-    """
-    #@TODO not completely implemented yet
-    def __init__(self, data_arrays, fieldnames, **kwargs) :
-        """
-        Construct an NArraysDataSet from a list of numpy tensor (data_arrays) and a list
-        of fieldnames. The number of arrays must be the same as the number of
-        fieldnames. Each set of numpy tensor must have the same first dimension (first
-        axis) corresponding to the number of examples.
-
-        Every tensor is treated as a numpy array (using numpy.asarray)
-        """
-        ArrayFieldsDataSet.__init__(self,**kwargs)
-        assert len(data_arrays) == len(fieldnames)
-        assert len(fieldnames) > 0
-        ndarrays = [numpy.asarray(a) for a in data_arrays]
-        lens = [a.shape[0] for a in ndarrays]
-        num_examples = lens[0] #they must all be equal anyway
-        self._fieldnames = fieldnames
-        for k in ndarrays :
-            assert k.shape[0] == num_examples
-        self._datas = ndarrays
-        # create dict 
-        self.map_field_idx = dict()
-        for k in range(len(fieldnames)):
-            self.map_field_idx[fieldnames[k]] = k
-
-
-    def __len__(self) :
-        """
-        Length of the dataset is based on the first array = data_arrays[0], using its shape
-        """
-        return self._datas[0].shape[0]
-
-    def fieldNames(self) :
-        """
-        Returns the fieldnames as set in self.__init__
-        """
-        return self._fieldnames
-
-    def field_pos(self,fieldname) :
-        """
-        Returns the index of a given fieldname. Fieldname must exists! see fieldNames().
-        """
-        return self.map_field_idx[fieldname]
-
-    def minibatches_nowrap(self,fieldnames,minibatch_size,n_batches,offset):
-        cursor = Example(fieldnames,[0]*len(fieldnames))
-        fieldnames = self.fieldNames() if fieldnames is None else fieldnames
-        for n in xrange(n_batches):
-            if offset == len(self):
-                break
-            for f in range(len(cursor._names)) :
-                idx = self.field_pos(cursor._names[f])
-                sub_data = self._datas[idx][offset : offset+minibatch_size]
-                cursor._values[f] = sub_data
-            offset += len(sub_data) #can be less than minibatch_size at end
-            yield cursor
-
-        #return ArrayDataSetIterator(self,fieldnames,minibatch_size,n_batches,offset)
-
-
-
-
-class ArrayDataSet(ArrayFieldsDataSet):
-    """
-    An ArrayDataSet stores the fields as groups of columns in a numpy tensor,
-    whose first axis iterates over examples, second axis determines fields.
-    If the underlying array is N-dimensional (has N axes), then the field
-    values are (N-2)-dimensional objects (i.e. ordinary numbers if N=2).
-    """
-
-    def __init__(self, data_array, fields_columns, **kwargs):
-        """
-        Construct an ArrayDataSet from the underlying numpy array (data) and
-        a map (fields_columns) from fieldnames to field columns. The columns of a field are specified
-        using the standard arguments for indexing/slicing: integer for a column index,
-        slice for an interval of columns (with possible stride), or iterable of column indices.
-        """
-        ArrayFieldsDataSet.__init__(self, **kwargs)
-        self.data=data_array
-        self.fields_columns=fields_columns
-
-        # check consistency and complete slices definitions
-        for fieldname, fieldcolumns in self.fields_columns.items():
-            if type(fieldcolumns) is int:
-                assert fieldcolumns>=0 and fieldcolumns<data_array.shape[1]
-                if 1:
-                    #I changed this because it didn't make sense to me,
-                    # and it made it more difficult to write my learner.
-                    # If it breaks stuff, let's talk about it.
-                    # - James 22/05/2008
-                    self.fields_columns[fieldname]=[fieldcolumns]
-                else:
-                    self.fields_columns[fieldname]=fieldcolumns
-            elif type(fieldcolumns) is slice:
-                start,step=fieldcolumns.start,fieldcolumns.step
-                if not start:
-                    start=0
-                if not step:
-                    step=1
-                self.fields_columns[fieldname]=slice(start,fieldcolumns.stop,step)
-            elif hasattr(fieldcolumns,"__iter__"): # something like a list
-                for i in fieldcolumns:
-                    assert i>=0 and i<data_array.shape[1]
-
-    def fieldNames(self):
-        return self.fields_columns.keys()
-
-    def __len__(self):
-        return len(self.data)
-
-    def __getitem__(self,key):
-        """More efficient implementation than the default __getitem__"""
-        fieldnames=self.fields_columns.keys()
-        values=self.fields_columns.values()
-        if type(key) is int:
-            return Example(fieldnames,
-                           [self.data[key,col] for col in values])
-        if type(key) is slice:
-            return Example(fieldnames,[self.data[key,col] for col in values])
-        if type(key) is list:
-            for i in range(len(key)):
-                if self.hasFields(key[i]):
-                    key[i]=self.fields_columns[key[i]]
-            return Example(fieldnames,
-                               #we must separate differently for list as numpy
-                               # doesn't support self.data[[i1,...],[i2,...]]
-                               # when their is more then two i1 and i2
-                               [self.data[key,:][:,col]
-                               if isinstance(col,list) else
-                               self.data[key,col] for col in values])
-
-        # else check for a fieldname
-        if self.hasFields(key):
-            return self.data[:,self.fields_columns[key]]
-        # else we are trying to access a property of the dataset
-        assert key in self.__dict__ # else it means we are trying to access a non-existing property
-        return self.__dict__[key]
-        
-    def dontuse__iter__(self):
-        class ArrayDataSetIteratorIter(object):
-            def __init__(self,dataset,fieldnames):
-                if fieldnames is None: fieldnames = dataset.fieldNames()
-                # store the resulting minibatch in a lookup-list of values
-                self.minibatch = Example(fieldnames,[0]*len(fieldnames))
-                self.dataset=dataset
-                self.current=0
-                self.columns = [self.dataset.fields_columns[f] 
-                                for f in self.minibatch._names]
-                self.l = self.dataset.data.shape[0]
-            def __iter__(self):
-                return self
-            def next(self):
-                #@todo: we suppose that we need to stop only when minibatch_size == 1.
-                # Otherwise, MinibatchWrapAroundIterator do it.
-                if self.current>=self.l:
-                    raise StopIteration
-                sub_data =  self.dataset.data[self.current]
-                self.minibatch._values = [sub_data[c] for c in self.columns]
-
-                self.current+=1
-                return self.minibatch
-
-        return ArrayDataSetIteratorIter(self,self.fieldNames())
-
-    def minibatches_nowrap(self,fieldnames,minibatch_size,n_batches,offset):
-        cursor = Example(fieldnames,[0]*len(fieldnames))
-        fieldnames = self.fieldNames() if fieldnames is None else fieldnames
-        if n_batches == None:
-            n_batches = (len(self) - offset) / minibatch_size
-        for n in xrange(n_batches):
-            if offset == len(self):
-                break
-            sub_data = self.data[offset : offset+minibatch_size]
-            offset += len(sub_data) #can be less than minibatch_size at end
-            cursor._values = [sub_data[:,self.fields_columns[f]] for f in cursor._names]
-            yield cursor
-
-        #return ArrayDataSetIterator(self,fieldnames,minibatch_size,n_batches,offset)
-
-
-class CachedDataSet(DataSet):
-  """
-  Wrap a L{DataSet} whose values are computationally expensive to obtain
-  (e.g. because they involve some computation, or disk access),
-  so that repeated accesses to the same example are done cheaply,
-  by caching every example value that has been accessed at least once.
-
-  Optionally, for finite-length dataset, all the values can be computed
-  (and cached) upon construction of the CachedDataSet, rather at the
-  first access.
-
-  @todo: when cache_all_upon_construction create mini-batches that are as 
-  large as possible but not so large as to fill up memory.
-  
-  @todo: add disk-buffering capability, so that when the cache becomes too
-  big for memory, we cache things on disk, trying to keep in memory only
-  the record most likely to be accessed next.
-  """
-  def __init__(self,source_dataset,cache_all_upon_construction=False):
-      self.source_dataset=source_dataset
-      self.cache_all_upon_construction=cache_all_upon_construction
-      self.cached_examples = []
-      if cache_all_upon_construction:
-          # this potentially brings all the source examples
-          # into memory at once, which may be too much
-          # the work could possibly be done by minibatches
-          # that are as large as possible but no more than what memory allows.
-          #
-          # field_values is supposed to be an DataSetFields, that inherits from LookupList
-          #fields_values = source_dataset.minibatches(minibatch_size=len(source_dataset)).__iter__().next()
-          fields_values = DataSetFields(source_dataset,None)
-          assert all([len(self)==len(field_values) for field_values in fields_values])
-          for example in fields_values.examples():
-              self.cached_examples.append(copy.copy(example))
-
-      self.fieldNames = source_dataset.fieldNames
-      self.hasFields = source_dataset.hasFields
-      self.valuesHStack = source_dataset.valuesHStack
-      self.valuesVStack = source_dataset.valuesVStack
-      
-  def __len__(self):
-      return len(self.source_dataset)
-
-  def minibatches_nowrap(self,fieldnames,minibatch_size,n_batches,offset):
-      class CacheIterator(object):
-          def __init__(self,dataset):
-              self.dataset=dataset
-              self.current=offset
-              self.all_fields = self.dataset.fieldNames()==fieldnames
-              self.n_batches = n_batches
-              self.batch_counter = 0
-          def __iter__(self): return self
-          def next(self):
-              self.batch_counter += 1
-              if self.n_batches and self.batch_counter > self.n_batches :
-                  raise StopIteration()
-              upper = self.current+minibatch_size
-              if upper > len(self.dataset.source_dataset):
-                  raise StopIteration()
-              cache_len = len(self.dataset.cached_examples)
-              if upper>cache_len: # whole minibatch is not already in cache
-                  # cache everything from current length to upper
-                  #for example in self.dataset.source_dataset[cache_len:upper]:
-                  for example in self.dataset.source_dataset.subset[cache_len:upper]:
-                      self.dataset.cached_examples.append(example)
-              all_fields_minibatch = Example(self.dataset.fieldNames(),
-                                             zip(*self.dataset.cached_examples[self.current:self.current+minibatch_size]))
-
-              self.current+=minibatch_size
-              if self.all_fields:
-                  return all_fields_minibatch
-              return Example(fieldnames,[all_fields_minibatch[name] for name in fieldnames])
-      return CacheIterator(self)
-
-  def dontuse__getitem__(self,i):
-      if type(i)==int and len(self.cached_examples)>i:
-          return self.cached_examples[i]
-      else:
-          return self.source_dataset[i]
-      
-  def __iter__(self):
-      class CacheIteratorIter(object):
-          def __init__(self,dataset):
-              self.dataset=dataset
-              self.l = len(dataset)
-              self.current = 0
-              self.fieldnames = self.dataset.fieldNames()
-              self.example = Example(self.fieldnames,[0]*len(self.fieldnames))
-          def __iter__(self): return self
-          def next(self):
-              if self.current>=self.l:
-                  raise StopIteration
-              cache_len = len(self.dataset.cached_examples)
-              if self.current>=cache_len: # whole minibatch is not already in cache
-                  # cache everything from current length to upper
-                  self.dataset.cached_examples.append(
-                      self.dataset.source_dataset[self.current])
-              self.example._values = self.dataset.cached_examples[self.current]
-              self.current+=1
-              return self.example
-
-      return CacheIteratorIter(self)
-
-class ApplyFunctionDataSet(DataSet):
-    """
-    A L{DataSet} that contains as fields the results of applying a
-    given function example-wise or minibatch-wise to all the fields of
-    an input dataset.  The output of the function should be an iterable
-    (e.g. a list or a LookupList) over the resulting values.
-    
-    The function take as input the fields of the dataset, not the examples.
-
-    In minibatch mode, the function is expected to work on minibatches
-    (takes a minibatch in input and returns a minibatch in output). More
-    precisely, it means that each element of the input or output list
-    should be iterable and indexable over the individual example values
-    (typically these elements will be numpy arrays). All of the elements
-    in the input and output lists should have the same length, which is
-    the length of the minibatch.
-
-    The function is applied each time an example or a minibatch is accessed.
-    To avoid re-doing computation, wrap this dataset inside a CachedDataSet.
-
-    If the values_{h,v}stack functions are not provided, then
-    the input_dataset.values{H,V}Stack functions are used by default.
-
-    """
-
-    def __init__(self,input_dataset,function,output_names,minibatch_mode=True,
-                 values_hstack=None,values_vstack=None,
-                 description=None,fieldtypes=None):
-        """
-        Constructor takes an input dataset that has as many fields as the function
-        expects as inputs. The resulting dataset has as many fields as the function
-        produces as outputs, and that should correspond to the number of output names
-        (provided in a list).
-
-        Note that the expected semantics of the function differs in minibatch mode
-        (it takes minibatches of inputs and produces minibatches of outputs, as
-        documented in the class comment).
-
-        TBM: are fieldtypes the old field types (from input_dataset) or the new ones
-        (for the new dataset created)?
-        """
-        self.input_dataset=input_dataset
-        self.function=function
-        self.output_names=output_names
-        #print 'self.output_names in afds:', self.output_names
-        #print 'length in afds:', len(self.output_names)
-        self.minibatch_mode=minibatch_mode
-        DataSet.__init__(self,description,fieldtypes)
-        self.valuesHStack = values_hstack if values_hstack else input_dataset.valuesHStack
-        self.valuesVStack = values_vstack if values_vstack else input_dataset.valuesVStack
-
-    def __len__(self):
-        return len(self.input_dataset)
-
-    def fieldNames(self):
-        return self.output_names
-
-    def minibatches_nowrap(self, fieldnames, *args, **kwargs):
-        all_input_fieldNames = self.input_dataset.fieldNames()
-        mbnw = self.input_dataset.minibatches_nowrap
-
-        for input_fields in mbnw(all_input_fieldNames, *args, **kwargs):
-            if self.minibatch_mode:
-                all_output_fields = self.function(*input_fields)
-            else:
-                input_examples = zip(*input_fields) #makes so that [i] means example i
-                output_examples = [self.function(*input_example)
-                                    for input_example in input_examples]
-                all_output_fields = zip(*output_examples)
-
-            #print 'output_names=', self.output_names
-            #print 'all_output_fields', all_output_fields
-            #print 'len(all_output_fields)=', len(all_output_fields)
-            all_outputs = Example(self.output_names, all_output_fields)
-            if fieldnames==self.output_names:
-                rval = all_outputs
-            else:
-                rval = Example(fieldnames,[all_outputs[name] for name in fieldnames])
-            #print 'rval', rval
-            #print '--------'
-            yield rval
-
-    def untested__iter__(self): # only implemented for increased efficiency
-        class ApplyFunctionSingleExampleIterator(object):
-            def __init__(self,output_dataset):
-                self.current=0
-                self.output_dataset=output_dataset
-                self.input_iterator=output_dataset.input_dataset.__iter__()
-            def __iter__(self): return self
-            def next(self):
-                if self.output_dataset.minibatch_mode:
-                    function_inputs = [[input] for input in self.input_iterator.next()]
-                    outputs = self.output_dataset.function(*function_inputs)
-                    assert all([hasattr(output,'__iter__') for output in outputs])
-                    function_outputs = [output[0] for output in outputs]
-                else:
-                    function_inputs = self.input_iterator.next()
-                    function_outputs = self.output_dataset.function(*function_inputs)
-                return Example(self.output_dataset.output_names,function_outputs)
-        return ApplyFunctionSingleExampleIterator(self)
-    
-def supervised_learning_dataset(src_dataset,input_fields,target_fields,weight_field=None):
-    """
-    Wraps an arbitrary L{DataSet} into one for supervised learning tasks
-    by forcing the user to define a set of fields as the 'input' field
-    and a set of fields as the 'target' field. Optionally, a single
-    weight_field can also be defined.
-    """
-    args = ((input_fields,'input'),(output_fields,'target'))
-    if weight_field: args+=(([weight_field],'weight'))
-    return src_dataset.merge_fields(*args)
-
-        
-
-    
diff -r 27b1344a57b1 -r 8fff4bc26f4c datasets/MNIST.py
--- a/datasets/MNIST.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,44 +0,0 @@
-"""
-Various routines to load/access MNIST data.
-"""
-from __future__ import absolute_import
-
-import os
-import numpy
-
-from ..amat import AMat
-from .config import data_root
-
-def head(n=10, path=None):
-    """Load the first MNIST examples.
-
-    Returns two matrices: x, y.  x has N rows of 784 columns.  Each row of x represents the
-    28x28 grey-scale pixels in raster order.  y is a vector of N integers.  Each element y[i]
-    is the label of the i'th row of x.
-    
-    """
-    path = os.path.join(data_root(), 'mnist','mnist_with_header.amat') if path is None else path
-
-    dat = AMat(path=path, head=n)
-
-    try:
-        assert dat.input.shape[0] == n
-        assert dat.target.shape[0] == n
-    except Exception , e:
-        raise Exception("failed to read MNIST data", (dat, e))
-
-    return dat.input, numpy.asarray(dat.target, dtype='int64').reshape(dat.target.shape[0])
-
-def train_valid_test(ntrain=50000, nvalid=10000, ntest=10000, path=None):
-    all_x, all_targ = head(ntrain+nvalid+ntest, path=path)
-
-    train = all_x[0:ntrain], all_targ[0:ntrain]
-    valid = all_x[ntrain:ntrain+nvalid], all_targ[ntrain:ntrain+nvalid]
-    test = all_x[ntrain+nvalid:ntrain+nvalid+ntest], all_targ[ntrain+nvalid:ntrain+nvalid+ntest]
-
-    return train, valid, test
-
-def all(path=None):
-    return head(n=None, path=path)
-
-
diff -r 27b1344a57b1 -r 8fff4bc26f4c datasets/__init__.py
--- a/datasets/__init__.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1 +0,0 @@
-from dataset import dataset, Dataset
diff -r 27b1344a57b1 -r 8fff4bc26f4c datasets/config.py
--- a/datasets/config.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,13 +0,0 @@
-"""Configuration options for datasets
-
-
-Especially, the locations of data files.
-"""
-
-import os
-def env_get(key, default):
-    return default if os.getenv(key) is None else os.getenv(key)
-
-def data_root():
-    return env_get('PYLEARN_DATA_ROOT', '/u/bergstrj/pub/data/')
-
diff -r 27b1344a57b1 -r 8fff4bc26f4c datasets/dataset.py
--- a/datasets/dataset.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,118 +0,0 @@
-"""The dataset-from-descriptor mechanism."""
-
-_factory = {}
-
-def add_dataset_factory(tok0, fn):
-    """Add `fn` as the handler for descriptors whose first token is `tok0`.
-
-    :returns: None
-
-    """
-    if tok0 in _factory:
-        raise Exception('Identifier already in use:', tok0)
-    else:
-        _factory[tok0] = fn
-
-def dataset_factory(tok0):
-    """Register a function as the handler for a given kind of dataset, identified by `tok0`.
-
-    When someone calls dataset_from_descr('kind_of_dataset option1 option2, etc.', approx=1),
-    then the handler registered for 'kind_of_dataset' will be called with the same arguments as
-    dataset_from_descr.
-
-    .. code-block:: python
-        
-        @dataset_factory('MNIST')
-        def mnist_related_dataset(descr, **kwargs):
-            ...
-
-    :returns: `dectorator`
-    """
-    def decorator(fn):
-        add_dataset_factory(tok0, fn)
-        return fn
-    return decorator
-
-def dataset(descr, **kwargs):
-    """Return the dataset described by `descr`.
-
-    :param descr: a dataset identifier
-    :type descr: str
-    :returns: `Dataset`
-
-    """
-    tok0 = descr.split()[0]
-    fn = _factory[tok0]
-    return fn(descr, **kwargs)
-
-
-class Dataset(object):
-    """Dataset is a generic container for pylearn datasets.
-
-    It is not intended to put any restriction whatsoever on its contents.
-
-    It is intended to encourage certain conventions, described below.  Conventions should arise
-    naturally among datasets in PyLearn.  When a few datasets adhere to a new convention, then
-    describe it here and make it more official.
-
-    If no particular convention applies.  Create your own object to store the dataset, and
-    assign it to the `data` attribute.
-    """
-    data = None
-
-    """
-    SIMPLE REGRESSION / CLASSIFICATION
-    ----------------------------------
-
-    In this setting, you are aiming to do vector classification or vector regression
-    where your train, valid and test sets fit in memory.
-    The convention is to put your data into numpy ndarray instances.  Put training data in the
-    `train` attribute,  validation data in the `valid` attribute and test data in the `test
-    attribute`.
-    Each of those attributes should be an instance that defines at least two attributes: `x` for the
-    input matrix and `y` for the target matrix.  The `x` ndarray should be one example per
-    leading index (row for matrices).
-    The `y` ndarray should be one target per leading index (entry for vectors, row for matrices).
-    If `y` is a classification target, than it should be a vector with numpy dtype 'int32'.
-    
-    If there are weights associated with different examples, then create a 'weights' attribute whose
-    value is a vector with one floating-point value (typically double-precision) per example.
-
-    If the task is classification, then the classes should be mapped to the integers
-    0,1,...,N-1.
-    The number of classes (here, N) should be stored in the `n_classes` attribute.
-
-    """
-    train = None #instance with .x, .y
-
-    valid = None #instance with .x, .y
-
-    test = None #instance with .x, .y
-
-    n_classes = None  #int
-
-    """
-    WHEN INPUTS ARE FIXED-SIZE GREYSCALE IMAGES
-    -------------------------------------------
-
-    In this setting we typically encode images as vectors, by enumerating the pixel values in
-    left-to-right, top-to-bottom order.  Pixel values should be in floating-point, and
-    normalized between 0 and 1.
-
-    The shape of the images should be recorded in the `img_shape` attribute as a tuple (rows,
-    cols).
-
-    """
-
-    img_shape = None # (rows, cols)
-
-
-    """
-    TIMESERIES
-    ----------
-
-    When dealing with examples which are themselves timeseries, put each example timeseries in a
-    tensor and make a list of them.  Generally use tensors, and resort to lists or arrays
-    wherever different 
-    """
-
diff -r 27b1344a57b1 -r 8fff4bc26f4c datasets/shapeset1.py
--- a/datasets/shapeset1.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,61 +0,0 @@
-"""
-Routines to load/access Shapeset1
-"""
-
-from __future__ import absolute_import
-
-import os
-import numpy
-
-from ..amat import AMat
-from .config import data_root
-
-def _head(path, n):
-    dat = AMat(path=path, head=n)
-
-    try:
-        assert dat.input.shape[0] == n
-        assert dat.target.shape[0] == n
-    except Exception , e:
-        raise Exception("failed to read %i lines from file %s" % (n, path))
-
-    return dat.input, numpy.asarray(dat.target, dtype='int64').reshape(dat.target.shape[0])
-
-
-def head_train(n=10000):
-    """Load the first Shapeset1 training examples.
-
-    Returns two matrices: x, y.  x has N rows of 1024 columns.  Each row of x represents the
-    32x32 grey-scale pixels in raster order.  y is a vector of N integers.  Each element y[i]
-    is the label of the i'th row of x.
-    
-    """
-    path = os.path.join(data_root(), 'shapeset1','shapeset1_1cspo_2_3.10000.train.shape.amat')
-    return _head(path, n)
-
-def head_valid(n=5000):
-    """Load the first Shapeset1 validation examples.
-
-    Returns two matrices: x, y.  x has N rows of 1024 columns.  Each row of x represents the
-    32x32 grey-scale pixels in raster order.  y is a vector of N integers.  Each element y[i]
-    is the label of the i'th row of x.
-    
-    """
-    path = os.path.join(data_root(), 'shapeset1','shapeset1_1cspo_2_3.5000.valid.shape.amat')
-    return _head(path, n)
-
-def head_test(n=5000):
-    """Load the first Shapeset1 testing examples.
-
-    Returns two matrices: x, y.  x has N rows of 1024 columns.  Each row of x represents the
-    32x32 grey-scale pixels in raster order.  y is a vector of N integers.  Each element y[i]
-    is the label of the i'th row of x.
-    
-    """
-    path = os.path.join(data_root(), 'shapeset1','shapeset1_1cspo_2_3.5000.test.shape.amat')
-    return _head(path, n)
-
-def train_valid_test(ntrain=10000, nvalid=5000, ntest=5000):
-    return head_train(n=ntrain), head_valid(n=nvalid), head_test(n=ntest)
-
-
diff -r 27b1344a57b1 -r 8fff4bc26f4c datasets/smallNorb.py
--- a/datasets/smallNorb.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,108 +0,0 @@
-import os
-import numpy
-from ..filetensor import read
-from .config import data_root
-
-#Path = '/u/bergstrj/pub/data/smallnorb'
-#Path = '/home/fringant2/lisa/louradoj/data/smallnorb'
-#Path = '/home/louradou/data/norb'
-
-class Paths(object):
-    """File-related operations on smallNorb
-    """
-    def __init__(self):
-        smallnorb = [data_root(), 'smallnorb']
-        self.train_dat = os.path.join(*\
-                smallnorb + ['smallnorb-5x46789x9x18x6x2x96x96-training-dat.mat'])
-        self.test_dat = os.path.join(*\
-                smallnorb + ['smallnorb-5x01235x9x18x6x2x96x96-testing-dat.mat'])
-        self.train_cat = os.path.join(*\
-                smallnorb + ['smallnorb-5x46789x9x18x6x2x96x96-training-cat.mat'])
-        self.test_cat = os.path.join(*\
-                smallnorb + ['smallnorb-5x01235x9x18x6x2x96x96-testing-cat.mat'])
-        self.train_info = os.path.join(*\
-                smallnorb + ['smallnorb-5x46789x9x18x6x2x96x96-training-info.mat'])
-        self.test_info = os.path.join(*\
-                smallnorb + ['smallnorb-5x01235x9x18x6x2x96x96-testing-info.mat'])
-
-    def load_append_train_test(self, normalize_pixels=True, downsample_amt=1, dtype='uint8'):
-        """ Load the smallNorb data into numpy matrices.
-
-        normalize_pixels True will divide the values by 255, which makes sense in conjunction
-        with dtype=float32 or dtype=float64.
-
-        """
-        def downsample(dataset):
-            return dataset[:, 0, ::downsample_amt, ::downsample_amt]
-
-        samples = downsample(read(open(self.train_dat)))
-        samples = numpy.vstack((samples, downsample(read(open(self.test_dat)))))
-        samples = numpy.asarray(samples, dtype=dtype)
-        if normalize_pixels:
-            samples *= (1.0 / 255.0)
-
-        labels = read(open(self.train_cat))
-        labels = numpy.hstack((labels, read(open(self.test_cat))))
-
-        infos = read(open(self.train_info))
-        infos = numpy.vstack((infos, read(open(self.test_info))))
-
-        return samples, labels, infos
-    
-def smallnorb_iid(ntrain=29160, nvalid=9720, ntest=9720, dtype='float64', normalize_pixels=True):
-    """Variation of the smallNorb task in which we randomly shuffle all the object instances
-    together before dividing into train/valid/test.
-
-    The default train/valid/test sizes correspond to 60/20/20 split of the entire dataset.
-
-    :returns: 5, (train_x, train_labels), (valid_x, valid_labels), (test_x, test_labels) 
-
-    """
-    # cut from /u/louradoj/theano/hpu/expcode1.py
-    rng = numpy.random.RandomState(1)        
-    samples, labels, infos = Paths().load_append_train_test(downsample_amt=3, dtype=dtype, normalize_pixels=normalize_pixels)
-
-    nsamples = samples.shape[0]
-    if ntrain + nvalid + ntest > nsamples:
-        raise Exception("ntrain+nvalid+ntest exceeds number of samples (%i)" % nsamples, 
-                (ntrain, nvalid, ntest))
-    i0 = 0
-    i1 = ntrain
-    i2 = ntrain + nvalid
-    i3 = ntrain + nvalid + ntest
-
-    indices = rng.permutation(nsamples)
-    train_rows = indices[i0:i1]
-    valid_rows = indices[i1:i2]
-    test_rows = indices[i2:i3]
-
-    n_labels = 5
-
-    def _pick_rows(rows):
-        a = numpy.array([samples[i].flatten() for i in rows])
-        b = numpy.array([labels[i] for i in rows])
-        return a, b
-
-    return [_pick_rows(r) for r in (train_rows, valid_rows, test_rows)]
-
-def smallnorb_azSplit():
-    # cut from /u/louradoj/theano/hpu/expcode1.py
-    # WARNING NOT NECESSARILY WORKING CODE
-
-    samples, labels, infos = _load_append_train_test()
-    train_rows, valid_rows, test_rows = [], [], []
-    train_rows_azimuth = []
-    for instance in range(10):
-        az_min = 4*instance
-        az_max = 4*instance + 18
-        train_rows_azimuth.append( [a % 36 for a in range(az_min,az_max,2)] )
-    #print "train_rows_azimuth", train_rows_azimuth
-    for i, info in enumerate(infos):
-        if info[2] in train_rows_azimuth[info[0]]:
-            train_rows.append(i)
-        elif info[2] / 2 % 2 == 0:
-            test_rows.append(i)
-        else:
-            valid_rows.append(i)
-
-    return [_pick_rows(samples, labels, r) for r in (train_rows, valid_rows, test_rows)]
diff -r 27b1344a57b1 -r 8fff4bc26f4c embeddings/README.txt
--- a/embeddings/README.txt	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,3 +0,0 @@
-Messy scripts for working with Jason + Ronan's embeddings.
-
-Parameters are given in parameters.py
diff -r 27b1344a57b1 -r 8fff4bc26f4c embeddings/__init__.py
--- a/embeddings/__init__.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1 +0,0 @@
-from process import *
diff -r 27b1344a57b1 -r 8fff4bc26f4c embeddings/convert.py
--- a/embeddings/convert.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,15 +0,0 @@
-#!/usr/bin/python
-"""
-Convert stdin sentences to word embeddings, and output YAML.
-"""
-
-import sys, string
-import read
-import yaml
-
-output = []
-for l in sys.stdin:
-    l = string.strip(l)
-    output.append((l, read.convert_string(l)))
-
-print yaml.dump(output)
diff -r 27b1344a57b1 -r 8fff4bc26f4c embeddings/one-per-line.py
--- a/embeddings/one-per-line.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,27 +0,0 @@
-#!/usr/bin/python
-
-import string
-#import psyco
-
-weightsfile = "lm-weights.txt"
-vocabfile = "words.asc"
-size = 30000
-dimensions = 50
-
-import numpy, math
-import sys
-from common.str import percent
-
-word_to_vector = {}
-
-f = open(weightsfile)
-f.readline()
-vals = [float(v) for v in string.split(f.readline())]
-assert len(vals) == size * dimensions
-vals.reverse()
-#for i in range(size):
-r = range(size)
-r.reverse()
-for i in r:
-    l = vals[dimensions*i:dimensions*(i+1)]
-    print string.join([`s` for s in l], "\t")
diff -r 27b1344a57b1 -r 8fff4bc26f4c embeddings/parameters.py
--- a/embeddings/parameters.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,10 +0,0 @@
-"""
-Locations of the embedding data files.
-"""
-#WEIGHTSFILE     = "/home/fringant2/lisa/data/word_embeddings.collobert-and-weston/lm-weights.txt"
-#VOCABFILE       = "/home/fringant2/lisa/data/word_embeddings.collobert-and-weston/words.asc"
-WEIGHTSFILE     = "/home/joseph/data/word_embeddings.collobert-and-weston/lm-weights.txt"
-VOCABFILE       = "/home/joseph/data/word_embeddings.collobert-and-weston/words.asc"
-NUMBER_OF_WORDS = 30000
-DIMENSIONS      = 50
-UNKNOWN         = "UNKNOWN"
diff -r 27b1344a57b1 -r 8fff4bc26f4c embeddings/process.py
--- a/embeddings/process.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,136 +0,0 @@
-"""
-Read in the weights file
-"""
-
-import string
-import sys
-
-from parameters import *
-
-__words = None
-__word_to_embedding = None
-__read = False
-
-def length():
-    """
-    @return: The length of embeddings
-    """
-    return len(__word_to_embedding[__words[0]])
-
-def word_to_embedding(w):
-    read_embeddings()
-    return __word_to_embedding[w]
-
-def read_embeddings():
-    global __words
-    global __word_to_embedding
-    global __read 
-    if __read: return
-
-    __words = [string.strip(w) for w in open(VOCABFILE).readlines()]
-    assert len(__words) == NUMBER_OF_WORDS
-
-    import numpy, math
-    from common.str import percent
-
-    __word_to_embedding = {}
-
-    sys.stderr.write("Reading %s...\n" % WEIGHTSFILE)
-    f = open(WEIGHTSFILE)
-    f.readline()
-    vals = [float(v) for v in string.split(f.readline())]
-    assert len(vals) == NUMBER_OF_WORDS * DIMENSIONS
-    for i in range(NUMBER_OF_WORDS):
-        l = vals[DIMENSIONS*i:DIMENSIONS*(i+1)]
-        w = __words[i]
-        __word_to_embedding[w] = l
-    __read = True
-    for w in __word_to_embedding: assert len(__word_to_embedding[__words[0]]) == len(__word_to_embedding[w])
-    sys.stderr.write("...done reading %s\n" % WEIGHTSFILE)
-
-import re
-numberre = re.compile("[0-9]")
-slashre = re.compile("\\\/")
- 
-def preprocess_word(origw):
-    """
-    Convert a word so that it can be embedded directly.
-    Returned the preprocessed sequence.
-    @note: Preprocessing is appropriate for Penn Treebank style documents.
-    #@note: Perhaps run L{common.penntreebank.preprocess} on the word first.
-    """
-    read_embeddings()
-    if origw == "-LRB-": w = "("
-    elif origw == "-RRB-": w = ")"
-    elif origw == "-LCB-": w = "{"
-    elif origw == "-RCB-": w = "}"
-    elif origw == "-LSB-": w = "["
-    elif origw == "-RSB-": w = "]"
-    else:
-        w = origw
-        if w not in __word_to_embedding:
-            w = string.lower(w)
-            w = slashre.sub("/", w)
-            w = numberre.sub("NUMBER", w)
-#    if w not in __word_to_embedding:
-#        w = string.lower(w)
-#        w = numberre.sub("NUMBER", w)
-    if w not in __word_to_embedding:
-#        sys.stderr.write("Word not in vocabulary, using %s: %s (original %s)\n" % (UNKNOWN, w, origw))
-        w = UNKNOWN
-    assert w in __word_to_embedding
-    return w
-
-def preprocess_seq(l):
-    """
-    Convert a sequence so that it can be embedded directly.
-    Returned the preprocessed sequence.
-    @note: Preprocessing is appropriate for Penn Treebank style documents.
-    """
-    read_embeddings()
-    lnew = []
-    for origw in l:
-        w = preprocess_word(origw)
-        lnew.append(w)
-    return lnew
-
-#def convert_string(s, strict=False):
-#    """
-#    Convert a string to a sequence of embeddings.
-#    @param strict: If strict, then words *must* be in the vocabulary.
-#    @todo: DEPRECATED Remove this function.
-#    """
-#    read_embeddings()
-#    e = []
-#    for origw in string.split(string.lower(s)):
-#        w = numberre.sub("NUMBER", origw)
-#        if w in __word_to_embedding:
-#            e.append(__word_to_embedding[w])
-#        else:
-#            sys.stderr.write("Word not in vocabulary, using %s: %s (original %s)\n" % (UNKNOWN, w, origw))
-#            assert not strict
-#            e.append(__word_to_embedding[UNKNOWN])
-#    return e
-
-#def test():
-#    """
-#    Debugging code.
-#    """
-#    read_embeddings()
-#    for w in __word_to_embedding:
-#        assert len(__word_to_embedding[w]) == 50
-#    import numpy
-#    for w1 in __words:
-#        e1 = numpy.asarray(__word_to_embedding[w1])
-#        lst = []
-#        print w1, numpy.dot(e1, e1)
-#        for w2 in __word_to_embedding:
-#            if w1 >= w2: continue
-#            e2 = numpy.asarray(__word_to_embedding[w2])
-#            d = (e1 - e2)
-#            l2 = numpy.dot(d, d)
-#            lst.append((l2, w1, w2))
-#        lst.sort()
-#        print lst[:10]
-#
-#test()
diff -r 27b1344a57b1 -r 8fff4bc26f4c embeddings/read-original.py
--- a/embeddings/read-original.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,47 +0,0 @@
-#!/usr/bin/python
-
-import string
-#import psyco
-
-weightsfile = "lm-weights.txt"
-vocabfile = "words.asc"
-size = 30000
-dimensions = 50
-
-words = [string.strip(w) for w in open(vocabfile).readlines()]
-assert len(words) == 30000
-
-import numpy, math
-import sys
-from common.str import percent
-
-word_to_vector = {}
-
-f = open(weightsfile)
-f.readline()
-vals = [float(v) for v in string.split(f.readline())]
-assert len(vals) == size * dimensions
-vals.reverse()
-for i in range(size):
-    l = vals[dimensions*i:dimensions*(i+1)]
-    w = words[i]
-    word_to_vector[w] = l
-
-#    l2 = numpy.asarray(l)
-#    print math.fabs(50 - numpy.sum(l2*l2)), w
-
-cnt = 0
-for i1 in range(len(words)):
-    for i2 in range(len(words)):
-        w1 = words[i1]
-        w2 = words[i2]
-        cnt += 1
-        if i1 <= i2: continue
-        l1 = numpy.asarray(word_to_vector[w1])
-        l2 = numpy.asarray(word_to_vector[w2])
-        d = l2 - l1
-        dist = numpy.sum(d * d)
-        if dist < 50:
-            print numpy.sum(d * d), w1, w2, i1, i2
-        if cnt % 1000 == 0:
-            sys.stderr.write("%s done...\n" % percent(cnt, len(word_to_vector) * len(word_to_vector)))
diff -r 27b1344a57b1 -r 8fff4bc26f4c examples/linear_classifier.py
--- a/examples/linear_classifier.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,224 +0,0 @@
-#! /usr/bin/env python
-"""
-T. Bertin-Mahieux (2008) University of Montreal
-bertinmt@iro.umontreal.ca
-
-linear_classifier.py
-Simple script that creates a linear_classifier, and
-learns the parameters using backpropagation.
-
-This is to illustrate how to use theano/pylearn.
-Anyone who knows how to make this script simpler/clearer is welcome to
-make the modifications.
-"""
-
-
-import os
-import sys
-import time
-import copy
-import pickle
-import numpy
-import numpy as N
-import numpy.random as NR
-from pylearn import cost
-import theano
-from theano import tensor as T
-
-
-def cost_function(*args,**kwargs) :
-    """ default cost function, quadratic """
-    return cost.quadratic(*args,**kwargs)
-
-
-class modelgraph() :
-    """ class that contains the graph of the model """
-    lr = T.scalar()                              # learning rate
-    inputs = T.matrix()                          # inputs (one example per line)
-    true_outputs = T.matrix()                    # outputs (one example per line)
-    W = T.matrix()                               # weights input * W + b= output
-    b = T.vector()                               # bias
-    outputs = T.dot(inputs,W) + b                # output, one per line
-    costs = cost_function(true_outputs,outputs)  # costs
-    g_W = T.grad(costs,W)                        # gradient of W
-    g_b = T.grad(costs,b)                        # gradient of b
-    new_W = T.sub_inplace(W, lr * g_W)           # update inplace of W
-    new_b = T.sub_inplace(b, lr * g_b)           # update inplace of b
-
-
-class model() :
-    """ 
-    The model! 
-    Contains needed matrices, needed functions, and a link to the model graph.
-    """
-
-    def __init__(self,input_size,output_size) :
-        """ init matrix and bias, creates the graph, create a dict of compiled functions """
-        # graph
-        self.graph = modelgraph()
-        # weights and bias, saved in self.params
-        seed = 666
-        r = NR.RandomState(seed)
-        W = r.uniform(size = [input_size, output_size], low = -1/N.sqrt(input_size), high = 1/N.sqrt(input_size))
-        b = numpy.zeros((output_size, ))
-        self.params = [W,b]
-        # dictionary of compiled functions
-        self.func_dict = dict()
-        # keep some init_infos (may not be necessary)
-        self.init_params = [input_size,output_size]
-
-
-    def update(self,lr,true_inputs,true_outputs) :
-        """ does an update of the model, one gradient descent """
-        # do we already have the proper theano function?
-        if self.func_dict.has_key('update_func') :
-            self.func_dict['update_func'](lr,true_inputs,true_outputs,self.params[0],self.params[1])
-            return
-        else :
-            # create the theano function, tell him what are the inputs and outputs)
-            func = theano.function([self.graph.lr,self.graph.inputs,self.graph.true_outputs,
-                                self.graph.W, self.graph.b],
-                               [self.graph.new_W,self.graph.new_b])
-            # add function to dictionary, so we don't compile it again
-            self.func_dict['update_func'] = func
-            # use this function
-            func(lr,true_inputs,true_outputs,self.params[0],self.params[1])
-            return
-    
-    def costs(self,true_inputs,true_outputs) :
-        """ get the costs for given examples, don't update """
-        # do we already have the proper theano function?
-        if self.func_dict.has_key('costs_func') :
-            return self.func_dict['costs_func'](true_inputs,true_outputs,self.params[0],self.params[1])
-        else :
-            # create the theano function, tell him what are the inputs and outputs)
-            func = theano.function([self.graph.inputs,self.graph.true_outputs,self.graph.W,self.graph.b],
-                               [self.graph.costs])
-            # add function to dictionary, se we don't compile it again
-            self.func_dict['costs_func'] = func
-            # use this function
-            return func(true_inputs,true_outputs,self.params[0],self.params[1])
-
-    def outputs(self,true_inputs) :
-        """ get the output for a set of examples (could be called 'predict') """
-        # do we already have the proper theano function?
-        if self.func_dict.has_key('outputs_func') :
-            return self.func_dict['outputs_func'](true_inputs,self.params[0],self.params[1])
-        else :
-            # create the theano function, tell him what are the inputs and outputs)
-            func = theano.function([self.graph.inputs, self.graph.W, self.graph.b],
-                               [self.graph.outputs])
-            # add function to dictionary, se we don't compile it again
-            self.func_dict['outputs_func'] = func
-            # use this function
-            return func(true_inputs,self.params[0],self.params[1])
-
-    def __getitem__(self,inputs) :
-        """ for simplicity, we can use the model this way: predictions = model[inputs] """
-        return self.outputs(inputs)
-
-    def __getstate__(self) :
-        """
-        To save/copy the model, used by pickle.dump() and by copy.deepcopy().
-        @return a dictionnary with the params (matrix + bias)
-        """
-        d = dict()
-        d['params'] = self.params
-        d['init_params'] = self.init_params
-        return d
-        
-    def __setstate__(self,d) :
-        """
-        Get the dictionary created by __getstate__(), use it to recreate the model.
-        """
-        self.params = d['params']
-        self.init_params = d['init_params']
-        self.graph = modelgraph() # we did not save the model graph 
-
-    def __str__(self) :
-        """ returns a string representing the model """
-        res = "Linear regressor, input size =",str(self.init_params[0])
-        res += ", output size =", str(self.init_params[1])
-        return res
-
-    def __equal__(self,other) :
-        """ 
-        Compares the model based on the params.
-        @return True if the params are the same, False otherwise
-        """
-        # class
-        if not isinstance(other,model) :
-            return False
-        # input size
-        if self.params[0].shape[0] != other.params[0].shape[0] :
-            return False
-        # output size
-        if self.params[0].shape[1] != other.params[0].shape[1] :
-            return False
-        # actual values
-        if not (self.params[0] == other.params[0]).all():
-            return False
-        if not (self.params[1] == other.params[1]).all():
-            return False
-        # all good
-        return True
-
-
-def die_with_usage() :
-    """ help menu """
-    print 'simple script to illustrate how to use theano/pylearn'
-    print 'to launch:'
-    print '  python linear_classifier.py -launch'
-    sys.exit(0)
-
-
-
-#************************************************************
-# main
-
-if __name__ == '__main__' :
-
-    if len(sys.argv) < 2 :
-        die_with_usage()
-
-    # print create data
-    inputs = numpy.array([[.1,.2],
-                          [.2,.8],
-                          [.9,.3],
-                          [.6,.5]])
-    outputs = numpy.array([[0],
-                           [0],
-                           [1],
-                           [1]])
-    assert inputs.shape[0] == outputs.shape[0]
-
-    # create model
-    m = model(2,1)
-    
-    # predict
-    print 'prediction before training:'
-    print m[inputs]
-
-    # update it for 100 iterations
-    for k in range(50) :
-        m.update(.1,inputs,outputs)
-
-     # predict
-    print 'prediction after training:'
-    print m[inputs]
-
-    # show points
-    import pylab as P
-    colors = outputs.flatten().tolist()
-    x = inputs[:,0]
-    y = inputs[:,1]
-    P.plot(x[numpy.where(outputs==0)[0]],y[numpy.where(outputs==0)[0]],'r+')
-    P.plot(x[numpy.where(outputs==1)[0]],y[numpy.where(outputs==1)[0]],'b+')
-    # decision line
-    p1 = (.5 - m.params[1] * 1.) / m.params[0][1,0] # abs = 0
-    p2 = (.5 - m.params[1] * 1.) / m.params[0][0,0] # ord = 0
-    P.plot((0,p2[0],2*p2[0]),(p1[0],0,-p1[0]),'g-')
-    # show
-    P.axis([-1,2,-1,2])
-    P.show()
-    
diff -r 27b1344a57b1 -r 8fff4bc26f4c examples/theano_update.py
--- a/examples/theano_update.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,56 +0,0 @@
-import theano
-from theano import tensor
-
-import numpy
-
-# Two scalar symbolic variables
-a = tensor.scalar()
-b = tensor.scalar()
-
-# Definition of output symbolic variable
-c = a * b
-# Definition of the function computing it
-fprop = theano.function([a,b], [c])
-
-# Initialize numerical variables
-a_val = numpy.array(12.)
-b_val = numpy.array(2.)
-print 'a_val =', a_val
-print 'b_val =', b_val
-
-# Numerical value of output is returned by the call to "fprop"
-c_val = fprop(a_val, b_val)
-print 'c_val =', c_val
-
-
-# Definition of simple update (increment by one)
-new_b = b + 1
-update = theano.function([b], [new_b])
-
-# New numerical value of b is returned by the call to "update"
-b_val = update(b_val)
-print 'new b_val =', b_val
-# We can use the new value in "fprop"
-c_val = fprop(a_val, b_val)
-print 'c_val =', c_val
-
-
-# Definition of in-place update (increment by one)
-re_new_b = tensor.add_inplace(b, 1.)
-re_update = theano.function([b], [re_new_b])
-
-# "re_update" can be used the same way as "update"
-b_val = re_update(b_val)
-print 'new b_val =', b_val
-# We can use the new value in "fprop"
-c_val = fprop(a_val, b_val)
-print 'c_val =', c_val
-
-# It is not necessary to keep the return value when the update is done in place
-re_update(b_val)
-print 'new b_val =', b_val
-c_val = fprop(a_val, b_val)
-print 'c_val =', c_val
-
-
-
diff -r 27b1344a57b1 -r 8fff4bc26f4c exceptions.py
--- a/exceptions.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,7 +0,0 @@
-"""
-Common exceptions.
-@todo: This file should be part of a common/ python package.
-"""
-
-class AbstractFunction (Exception): """Derived class must override this function"""
-class NotImplementedYet (NotImplementedError): """Work in progress, this should eventually be implemented"""
diff -r 27b1344a57b1 -r 8fff4bc26f4c external/wrap_libsvm.py
--- a/external/wrap_libsvm.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,99 +0,0 @@
-"""Run an experiment using libsvm.
-"""
-import numpy
-from ..datasets import dataset_from_descr
-
-# libsvm currently has no python installation instructions/convention.
-#
-# This module uses a specific convention for libsvm's installation.
-# I base this on installing libsvm-2.88.
-# To install libsvm's python module, do three things:
-# 1. Build libsvm (run make in both the root dir and the python subdir).
-# 2. touch a '__init__.py' file in the python subdir
-# 3. add a symbolic link to a PYTHONPATH location that looks like this:
-#    libsvm -> <your root path>/libsvm-2.88/python/
-#
-# That is the sort of thing that this module expects from 'import libsvm'
-
-import libsvm
-
-def score_01(x, y, model):
-    assert len(x) == len(y)
-    size = len(x)
-    errors = 0
-    for i in range(size):
-        prediction = model.predict(x[i])
-        #probability = model.predict_probability
-        if (y[i] != prediction):
-            errors = errors + 1
-    return float(errors)/size
-
-#this is the dbdict experiment interface... if you happen to use dbdict
-class State(object):
-    #TODO: parametrize to get all the kernel types, not hardcode for RBF
-    dataset = 'MNIST_1k'
-    C = 10.0
-    kernel = 'RBF'
-    # rel_gamma is related to the procedure Jerome used. He mentioned why in
-    # quadratic_neurons/neuropaper/draft3.pdf.
-    rel_gamma = 1.0   
-
-    def __init__(self, **kwargs):
-        for k, v in kwargs:
-            setattr(self, k, type(getattr(self, k))(v))
-
-
-def dbdict_run_svm_experiment(state, channel=lambda *args, **kwargs:None):
-    """Parameters are described in state, and returned in state.
-
-    :param state: object instance to store parameters and return values
-    :param channel: not used
-
-    :returns: None
-
-    This is the kind of function that dbdict-run can use.
-
-    """
-    ((train_x, train_y), (valid_x, valid_y), (test_x, test_y)) = dataset_from_descr(state.dataset)
-
-    #libsvm needs stuff in int32 on a 32bit machine
-    #TODO: test this on a 64bit machine
-    train_y = numpy.asarray(train_y, dtype='int32')
-    valid_y = numpy.asarray(valid_y, dtype='int32')
-    test_y = numpy.asarray(test_y, dtype='int32')
-    problem = svm.svm_problem(train_y, train_x);
-
-    gamma0 = 0.5 / numpy.sum(numpy.var(train_x, axis=0))
-
-    param = svm.svm_parameter(C=state.C,
-            kernel_type=getattr(svm, state.kernel),
-            gamma=state.rel_gamma * gamma0)
-
-    model = svm.svm_model(problem, param) #this is the expensive part
-
-    state.train_01 = score_01(train_x, train_y, model)
-    state.valid_01 = score_01(valid_x, valid_y, model)
-    state.test_01 = score_01(test_x, test_y, model)
-
-    state.n_train = len(train_y)
-    state.n_valid = len(valid_y)
-    state.n_test = len(test_y)
-
-def run_svm_experiment(**kwargs):
-    """Python-friendly interface to dbdict_run_svm_experiment
-
-    Parameters are used to construct a `State` instance, which is returned after running
-    `dbdict_run_svm_experiment` on it.
-
-    .. code-block:: python
-        results = run_svm_experiment(dataset='MNIST_1k', C=100.0, rel_gamma=0.01)
-        print results.n_train
-        # 1000
-        print results.valid_01, results.test_01
-        # 0.14, 0.10  #.. or something...
-
-    """
-    state = State(**kwargs)
-    state_run_svm_experiment(state)
-    return state
-
diff -r 27b1344a57b1 -r 8fff4bc26f4c filetensor.py
--- a/filetensor.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,141 +0,0 @@
-"""
-Read and write the matrix file format described at
-U{http://www.cs.nyu.edu/~ylclab/data/norb-v1.0/index.html}
-
-The format is for dense tensors:
-
-    - magic number indicating type and endianness - 4bytes
-    - rank of tensor - int32
-    - dimensions - int32, int32, int32, ...
-    - <data>
-
-The number of dimensions and rank is slightly tricky: 
-    - for scalar: rank=0, dimensions = [1, 1, 1]
-    - for vector: rank=1, dimensions = [?, 1, 1]
-    - for matrix: rank=2, dimensions = [?, ?, 1]
-
-For rank >= 3, the number of dimensions matches the rank exactly.
-
-
-@todo: add complex type support
-
-"""
-import sys
-import numpy
-
-def _prod(lst):
-    p = 1
-    for l in lst:
-        p *= l
-    return p
-
-_magic_dtype = {
-        0x1E3D4C51 : ('float32', 4),
-        #0x1E3D4C52 : ('packed matrix', 0), #what is a packed matrix?
-        0x1E3D4C53 : ('float64', 8),
-        0x1E3D4C54 : ('int32', 4),
-        0x1E3D4C55 : ('uint8', 1),
-        0x1E3D4C56 : ('int16', 2),
-        }
-_dtype_magic = {
-        'float32': 0x1E3D4C51,
-        #'packed matrix': 0x1E3D4C52,
-        'float64': 0x1E3D4C53,
-        'int32': 0x1E3D4C54,
-        'uint8': 0x1E3D4C55,
-        'int16': 0x1E3D4C56
-        }
-
-#
-# TODO: implement item selection:
-#  e.g. load('some mat', subtensor=(:6, 2:5))
-#
-#  This function should be memory efficient by:
-#  - allocating an output matrix at the beginning
-#  - seeking through the file, reading subtensors from multiple places
-def read(f, subtensor=None, debug=False):
-    """Load all or part of file 'f' into a numpy ndarray
-
-    @param f: file from which to read
-    @type f: file-like object
-
-    If subtensor is not None, it should be like the argument to
-    numpy.ndarray.__getitem__.  The following two expressions should return
-    equivalent ndarray objects, but the one on the left may be faster and more
-    memory efficient if the underlying file f is big.
-
-        read(f, subtensor) <===> read(f)[*subtensor]
-    
-    Support for subtensors is currently spotty, so check the code to see if your
-    particular type of subtensor is supported.
-
-    """
-    def _read_int32(f):
-        s = f.read(4)
-        s_array = numpy.fromstring(s, dtype='int32')
-        return s_array.item()
-
-    #what is the data type of this matrix?
-    #magic_s = f.read(4)
-    #magic = numpy.fromstring(magic_s, dtype='int32')
-    magic = _read_int32(f)
-    magic_t, elsize = _magic_dtype[magic]
-    if debug: 
-        print 'header magic', magic, magic_t, elsize
-    if magic_t == 'packed matrix':
-        raise NotImplementedError('packed matrix not supported')
-
-    #what is the rank of the tensor?
-    ndim = _read_int32(f)
-    if debug: print 'header ndim', ndim
-
-    #what are the dimensions of the tensor?
-    dim = numpy.fromfile(f, dtype='int32', count=max(ndim,3))[:ndim]
-    dim_size = _prod(dim)
-    if debug: print 'header dim', dim, dim_size
-
-    rval = None
-    if subtensor is None:
-        rval = numpy.fromfile(f, dtype=magic_t, count=_prod(dim)).reshape(dim)
-    elif isinstance(subtensor, slice):
-        if subtensor.step not in (None, 1):
-            raise NotImplementedError('slice with step', subtensor.step)
-        if subtensor.start not in (None, 0):
-            bytes_per_row = _prod(dim[1:]) * elsize
-            raise NotImplementedError('slice with start', subtensor.start)
-        dim[0] = min(dim[0], subtensor.stop)
-        rval = numpy.fromfile(f, dtype=magic_t, count=_prod(dim)).reshape(dim)
-    else:
-        raise NotImplementedError('subtensor access not written yet:', subtensor) 
-
-    return rval
-
-def write(f, mat):
-    """Write a numpy.ndarray to file.
-
-    @param f: file into which to write
-    @type f: file-like object
-
-    @param mat: array to write to file
-    @type mat: numpy ndarray or compatible
-
-    """
-    def _write_int32(f, i):
-        i_array = numpy.asarray(i, dtype='int32')
-        if 0: print 'writing int32', i, i_array
-        i_array.tofile(f)
-
-    try:
-        _write_int32(f, _dtype_magic[str(mat.dtype)])
-    except KeyError:
-        raise TypeError('Invalid ndarray dtype for filetensor format', mat.dtype)
-
-    _write_int32(f, len(mat.shape))
-    shape = mat.shape
-    if len(shape) < 3:
-        shape = list(shape) + [1] * (3 - len(shape))
-    if 0: print 'writing shape =', shape
-    for sh in shape:
-        _write_int32(f, sh)
-    mat.tofile(f)
-
diff -r 27b1344a57b1 -r 8fff4bc26f4c image_tools.py
--- a/image_tools.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,39 +0,0 @@
-
-import numpy
-
-
-def make_weights_image(mat, xres, yres, i, j, nrow, ncol):
-    """
-    Displays the filters implemented by a weight matrix.
-
-    Each filter corresponds to a row of mat and will be represented
-    by a xres*yres image.
-
-    Units from i to j will be included in the picture.
-
-    The picture will have nrow rows of filters and ncol columns
-    of filters. Unused spots for filters will be filled with zeros.
-
-    The return value is a matrix suitable for display with
-    matplotlib's imshow.
-    """
-
-    assert j > i
-    n = j - i
-    result = numpy.zeros((ncol * xres, nrow * yres))
-    submat = mat[i:j]
-    for k, row in enumerate(submat):
-        x = (k % ncol)*xres
-        y = (k / ncol)*yres
-        entry = row.reshape((xres, yres))
-        lmin, lmax = numpy.min(entry), numpy.max(entry)
-        ldiff = lmax - lmin
-        #entry = (entry - lmin) / ldiff
-        result[x:x + xres, y:y + yres] = entry
-    return result.T
-
-
-
-
-
-
diff -r 27b1344a57b1 -r 8fff4bc26f4c kernel_regression.py
--- a/kernel_regression.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,231 +0,0 @@
-"""
-Implementation of kernel regression:
-"""
-
-from pylearn.learner import OfflineLearningAlgorithm
-from theano import tensor as T
-from theano.tensor.nnet import prepend_1_to_each_row
-from theano.scalar import as_scalar
-from common.autoname import AutoName
-import theano
-import numpy
-
-# map a N-vector to a 1xN matrix
-row_vector = theano.tensor.DimShuffle((False,),['x',0])
-# map a N-vector to a Nx1 matrix
-col_vector = theano.tensor.DimShuffle((False,),[0,'x'])
-
-class KernelRegression(OfflineLearningAlgorithm):
-    """
-Implementation of kernel regression:
-* the data are n (x_t,y_t) pairs and we want to estimate E[y|x]
-* the predictor computes
-     f(x) = b + \sum_{t=1}^n \alpha_t K(x,x_t)
-  with free parameters b and alpha, training inputs x_t,
-  and kernel function K (gaussian by default).
-  Clearly, each prediction involves O(n) computations.
-* the learner chooses b and alpha to minimize
-     lambda alpha' G' G alpha + \sum_{t=1}^n (f(x_t)-y_t)^2
-  where G is the matrix with entries G_ij = K(x_i,x_j).
-  The first (L2 regularization) term is the squared L2
-  norm of the primal weights w = \sum_t \alpha_t phi(x_t)
-  where phi is the function s.t. K(u,v)=phi(u).phi(v).
-* this involves solving a linear system with (n+1,n+1)
-  matrix, which is an O(n^3) computation. In addition,
-  that linear system matrix requires O(n^2) memory.
-  So this learning algorithm should be used only for
-  small datasets.
-* the linear system is
-      (M + lambda I_n) theta = (1, y)'
-  where theta = (b, alpha), I_n is the (n+1)x(n+1) matrix that is the identity 
-  except with a 0 at (0,0), M is the matrix with G in the sub-matrix starting 
-  at (1,1), 1's in column 0, except for a value of n at (0,0), and sum_i G_{i,j} 
-  in the rest of row 0.
-  
-Note that this is gives an estimate of E[y|x,training_set] that is the
-same as obtained with a Gaussian process regression. The GP
-regression would also provide a Bayesian Var[y|x,training_set].
-It corresponds to an assumption that f is a random variable
-with Gaussian (process) prior distribution with covariance
-function K. Because we assume Gaussian noise we obtain a Gaussian
-posterior for f (whose mean is computed here).
-
-
-    Usage:
-
-       kernel_regressor=KernelRegression(L2_regularizer=0.1,gamma=0.5) (kernel=GaussianKernel(gamma=0.5))
-       kernel_predictor=kernel_regressor(training_set)
-       all_results_dataset=kernel_predictor(test_set) # creates a dataset with "output" and "squared_error" field
-       outputs = kernel_predictor.compute_outputs(inputs) # inputs and outputs are numpy arrays
-       outputs, errors = kernel_predictor.compute_outputs_and_errors(inputs,targets)
-       errors = kernel_predictor.compute_errors(inputs,targets)
-       mse = kernel_predictor.compute_mse(inputs,targets)
-       
-       
-
-    The training_set must have fields "input" and "target".
-    The test_set must have field "input", and needs "target" if
-    we want to compute the squared errors.
-
-    The predictor parameters are obtained analytically from the training set.
-    Training is only done on a whole training set rather than on minibatches
-    (no online implementation).
-
-    The dataset fields expected and produced by the learning algorithm and the trained model
-    are the following:
-
-     - Input and output dataset fields (example-wise quantities):
-
-       - 'input' (always expected as an input_dataset field)
-       - 'target' (always expected by the learning algorithm, optional for learned model)
-       - 'output' (always produced by learned model)
-       - 'squared_error' (optionally produced by learned model if 'target' is provided)
-          = example-wise squared error
-    """
-    def __init__(self, kernel=None, L2_regularizer=0, gamma=1, use_bias=False):
-        # THE VERSION WITH BIAS DOES NOT SEEM RIGHT
-        self.kernel = kernel
-        self.L2_regularizer=L2_regularizer
-        self.use_bias=use_bias
-        self.gamma = gamma # until we fix things, the kernel type is fixed, Gaussian
-        self.equations = KernelRegressionEquations()
-
-    def __call__(self,trainset):
-        n_examples = len(trainset)
-        first_example = trainset[0]
-        n_inputs = first_example['input'].size
-        n_outputs = first_example['target'].size
-        b1=1 if self.use_bias else 0
-        M = numpy.zeros((n_examples+b1,n_examples+b1))
-        Y = numpy.zeros((n_examples+b1,n_outputs))
-        for i in xrange(n_examples):
-            M[i+b1,i+b1]=self.L2_regularizer
-        data = trainset.fields()
-        train_inputs = numpy.array(data['input'])
-        if self.use_bias:
-            Y[0]=1
-        Y[b1:,:] = numpy.array(data['target'])
-        train_inputs_square,sumG,G=self.equations.compute_system_matrix(train_inputs,self.gamma)
-        M[b1:,b1:] += G
-        if self.use_bias:
-            M[0,1:] = sumG
-            M[1:,0] = 1
-            M[0,0] = M.shape[0]
-        self.M=M
-        self.Y=Y
-        theta=numpy.linalg.solve(M,Y)
-        return KernelPredictor(theta,self.gamma, train_inputs, train_inputs_square)
-
-class KernelPredictorEquations(AutoName):
-    train_inputs = T.matrix() # n_examples x n_inputs
-    train_inputs_square = T.vector() # n_examples
-    inputs = T.matrix() # minibatchsize x n_inputs
-    targets = T.matrix() # minibatchsize x n_outputs
-    theta = T.matrix() # (n_examples+1) x n_outputs
-    b1 = T.shape(train_inputs_square)[0]<T.shape(theta)[0]
-    gamma = T.scalar()
-    inv_gamma2 = 1./(gamma*gamma)
-    b = b1*theta[0]
-    alpha = theta[b1:,:]
-    inputs_square = T.sum(inputs*inputs,axis=1)
-    Kx = T.exp(-(row_vector(train_inputs_square)-2*T.dot(inputs,train_inputs.T)+col_vector(inputs_square))*inv_gamma2)
-    outputs = T.dot(Kx,alpha) + b # minibatchsize x n_outputs
-    squared_errors = T.sum(T.sqr(targets-outputs),axis=1)
-
-    __compiled = False
-    @classmethod
-    def compile(cls,linker='c|py'):
-        if cls.__compiled:
-            return
-        def fn(input_vars,output_vars):
-            return staticmethod(theano.function(input_vars,output_vars, linker=linker))
-
-        cls.compute_outputs = fn([cls.inputs,cls.theta,cls.gamma,cls.train_inputs,cls.train_inputs_square],[cls.outputs])
-        cls.compute_errors = fn([cls.outputs,cls.targets],[cls.squared_errors])
-
-        cls.__compiled = True
-
-    def __init__(self):
-        self.compile()
-        
-class KernelRegressionEquations(KernelPredictorEquations):
-    #M = T.matrix() # (n_examples+1) x (n_examples+1)
-    inputs = T.matrix() # n_examples x n_inputs
-    gamma = T.scalar()
-    inv_gamma2 = 1./(gamma*gamma)
-    inputs_square = T.sum(inputs*inputs,axis=1)
-    #new_G = G+T.dot(inputs,inputs.T)
-    #new_G = T.gemm(G,1.,inputs,inputs.T,1.)
-    G = T.exp(-(row_vector(inputs_square)-2*T.dot(inputs,inputs.T)+col_vector(inputs_square))*inv_gamma2)
-    sumG = T.sum(G,axis=0)
-    
-    __compiled = False
-    
-    @classmethod
-    def compile(cls,linker='c|py'):
-        if cls.__compiled:
-            return
-        def fn(input_vars,output_vars):
-            return staticmethod(theano.function(input_vars,output_vars, linker=linker))
-
-        cls.compute_system_matrix = fn([cls.inputs,cls.gamma],[cls.inputs_square,cls.sumG,cls.G])
-
-        cls.__compiled = True
-
-    def __init__(self):
-        self.compile()
-
-class KernelPredictor(object):
-    """
-    A kernel predictor has parameters theta (a bias vector and a weight matrix alpha)
-    it can use to make a non-linear prediction (according to the KernelPredictorEquations).
-    It can compute its output (bias + alpha * kernel(train_inputs,input) and a squared error (||output - target||^2).
-    """
-    def __init__(self, theta, gamma, train_inputs, train_inputs_square=None):
-        self.theta=theta
-        self.gamma=gamma
-        self.train_inputs=train_inputs
-        if train_inputs_square==None:
-            train_inputs_square = numpy.sum(train_inputs*train_inputs,axis=1)
-        self.train_inputs_square=train_inputs_square
-        self.equations = KernelPredictorEquations()
-
-    def compute_outputs(self,inputs):
-        return self.equations.compute_outputs(inputs,self.theta,self.gamma,self.train_inputs,self.train_inputs_square)
-    def compute_errors(self,inputs,targets):
-        return self.equations.compute_errors(self.compute_outputs(inputs),targets)
-    def compute_outputs_and_errors(self,inputs,targets):
-        outputs = self.compute_outputs(inputs)
-        return [outputs,self.equations.compute_errors(outputs,targets)]
-    def compute_mse(self,inputs,targets):
-        errors = self.compute_errors(inputs,targets)
-        return numpy.sum(errors)/errors.size
-    
-    def __call__(self,dataset,output_fieldnames=None,cached_output_dataset=False):
-        assert dataset.hasFields(["input"])
-        if output_fieldnames is None:
-            if dataset.hasFields(["target"]):
-                output_fieldnames = ["output","squared_error"]
-            else:
-                output_fieldnames = ["output"]
-        output_fieldnames.sort()
-        if output_fieldnames == ["squared_error"]:
-            f = self.compute_errors
-        elif output_fieldnames == ["output"]:
-            f = self.compute_outputs
-        elif output_fieldnames == ["output","squared_error"]:
-            f = self.compute_outputs_and_errors
-        else:
-            raise ValueError("unknown field(s) in output_fieldnames: "+str(output_fieldnames))
-        
-        ds=ApplyFunctionDataSet(dataset,f,output_fieldnames)
-        if cached_output_dataset:
-            return CachedDataSet(ds)
-        else:
-            return ds
-        
-
-def kernel_predictor(inputs,params,*otherargs):
-  p = KernelPredictor(params,*otherargs[0])
-  return p.compute_outputs(inputs)
-  
diff -r 27b1344a57b1 -r 8fff4bc26f4c learner.py
--- a/learner.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,135 +0,0 @@
-
-
-from exceptions import *
-from dataset import AttributesHolder
-
-class OfflineLearningAlgorithm(object):
-    """
-    Base class for offline learning algorithms, provides an interface
-    that allows various algorithms to be applicable to generic learning
-    algorithms. It is only given here to define the expected semantics.
-
-    An offline learning algorithm can be seen as a function that when
-    applied to training data returns a learned function (which is an object that
-    can be applied to other data and return some output data).
-
-    The offline learning scenario is the standard and most common one 
-    in machine learning:  an offline learning algorithm is applied
-    to a training dataset, 
-
-        model = learning_algorithm(training_set)
-        
-    resulting in a fully trained model that can be applied to another dataset
-    in order to perform some desired computation:
-
-        output_dataset = model(input_dataset)
-
-    Note that the application of a dataset has no side-effect on the model.
-    In that example, the training set may for example have 'input' and 'target'
-    fields while the input dataset may have only 'input' (or both 'input' and
-    'target') and the output dataset would contain some default output fields defined
-    by the learning algorithm (e.g. 'output' and 'error'). The user may specifiy
-    what the output dataset should contain either by setting options in the
-    model, by the presence of particular fields in the input dataset, or with
-    keyword options of the __call__ method of the model (see LearnedModel.__call__).
-
-    """
-
-    def __init__(self): pass
-
-    def __call__(self, training_dataset):
-        """
-        Return a fully trained TrainedModel.
-        """
-        raise AbstractFunction()
-    
-class TrainedModel(AttributesHolder):
-    """
-    TrainedModel is a base class for models returned by instances of an
-    OfflineLearningAlgorithm subclass. It is only given here to define the expected semantics.
-    """
-    def __init__(self):
-        pass
-
-    def __call__(self,input_dataset,output_fieldnames=None,
-                 test_stats_collector=None,copy_inputs=False,
-                 put_stats_in_output_dataset=True,
-                 output_attributes=[]):
-        """
-        A L{TrainedModel} can be used with
-        with one or more calls to it. The main argument is an input L{DataSet} (possibly
-        containing a single example) and the result is an output L{DataSet} of the same length.
-        If output_fieldnames is specified, it may be use to indicate which fields should
-        be constructed in the output L{DataSet} (for example ['output','classification_error']).
-        Otherwise, some default output fields are produced (possibly depending on the input
-        fields available in the input_dataset).
-        Optionally, if copy_inputs, the input fields (of the input_dataset) can be made
-        visible in the output L{DataSet} returned by this method.
-        Optionally, attributes of the learner can be copied in the output dataset,
-        and statistics computed by the stats collector also put in the output dataset.
-        Note the distinction between fields (which are example-wise quantities, e.g. 'input')
-        and attributes (which are not, e.g. 'regularization_term').
-        """
-        raise AbstractFunction()
-
-
-class OnlineLearningAlgorithm(object):
-    """
-    Base class for online learning algorithms, provides an interface
-    that allows various algorithms to be applicable to generic online learning
-    algorithms. It is only given here to define the expected semantics.
-
-    The basic setting is that the training data are only revealed in pieces
-    (maybe one example or a batch of example at a time):
-
-       model = learning_algorithm()
-
-    results in a fresh model. The model can be adapted by presenting
-    it with some training data,
-
-       model.update(some_training_data)
-       ...
-       model.update(some_more_training_data)
-       ...
-       model.update(yet_more_training_data)
-
-    and at any point one can use the model to perform some computation:
-    
-       output_dataset = model(input_dataset)
-
-    The model should be a LearnerModel subclass instance, and LearnerModel
-    is a subclass of LearnedModel.
-
-    """
-
-    def __init__(self): pass
-
-    def __call__(self, training_dataset=None):
-        """
-        Return a LearnerModel, either fresh (if training_dataset is None) or fully trained (otherwise).
-        """
-        raise AbstractFunction()
-    
-class LearnerModel(TrainedModel):
-    """
-    LearnerModel is a base class for models returned by instances of a LearningAlgorithm subclass.
-    It is only given here to define the expected semantics.
-    """
-    def __init__(self):
-        pass
-
-    def update(self,training_set,train_stats_collector=None):
-        """
-        Continue training a learner model, with the evidence provided by the given training set.
-        Hence update can be called multiple times. This is the main method used for training in the
-        on-line setting or the sequential (Bayesian or not) settings.
-
-        This function has as side effect that self(data) will behave differently,
-        according to the adaptation achieved by update().
-
-        The user may optionally provide a training L{StatsCollector} that is used to record
-        some statistics of the outputs computed during training. It is update(d) during
-        training.
-        """
-        raise AbstractFunction()
-    
diff -r 27b1344a57b1 -r 8fff4bc26f4c linear_regression.py
--- a/linear_regression.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,201 +0,0 @@
-"""
-Implementation of linear regression, with or without L2 regularization.
-This is one of the simplest example of L{learner}, and illustrates
-the use of theano.
-"""
-
-from pylearn.learner import OfflineLearningAlgorithm,OnlineLearningAlgorithm
-from theano import tensor as T
-from theano.tensor.nnet import prepend_1_to_each_row
-from theano.scalar import as_scalar
-from common.autoname import AutoName
-import theano
-import numpy
-
-class LinearRegression(OfflineLearningAlgorithm):
-    """
-    Implement linear regression, with or without L2 regularization
-    (the former is called Ridge Regression and the latter Ordinary Least Squares).
-
-    Usage:
-
-       linear_regressor=LinearRegression(L2_regularizer=0.1)
-       linear_predictor=linear_regression(training_set)
-       all_results_dataset=linear_predictor(test_set) # creates a dataset with "output" and "squared_error" field
-       outputs = linear_predictor.compute_outputs(inputs) # inputs and outputs are numpy arrays
-       outputs, errors = linear_predictor.compute_outputs_and_errors(inputs,targets)
-       errors = linear_predictor.compute_errors(inputs,targets)
-       mse = linear_predictor.compute_mse(inputs,targets)
-       
-       
-
-    The training_set must have fields "input" and "target".
-    The test_set must have field "input", and needs "target" if
-    we want to compute the squared errors.
-
-    The predictor parameters are obtained analytically from the training set.
-
-    For each (input[t],output[t]) pair in a minibatch,::
-    
-       output_t = b + W * input_t
-
-    where b and W are obtained by minimizing::
-
-       L2_regularizer sum_{ij} W_{ij}^2  + sum_t ||output_t - target_t||^2
-
-    Let X be the whole training set inputs matrix (one input example per row),
-    with the first column full of 1's, and Let Y the whole training set
-    targets matrix (one example's target vector per row).
-    Let theta = the matrix with b in its first column and W in the others,
-    then each theta[:,i] is the solution of the linear system::
-
-       XtX * theta[:,i] = XtY[:,i]
-
-    where XtX is a (n_inputs+1)x(n_inputs+1) matrix containing X'*X
-    plus L2_regularizer on the diagonal except at (0,0),
-    and XtY is a (n_inputs+1)*n_outputs matrix containing X'*Y.
-
-    The dataset fields expected and produced by the learning algorithm and the trained model
-    are the following:
-
-     - Input and output dataset fields (example-wise quantities):
-
-       - 'input' (always expected as an input_dataset field)
-       - 'target' (always expected by the learning algorithm, optional for learned model)
-       - 'output' (always produced by learned model)
-       - 'squared_error' (optionally produced by learned model if 'target' is provided)
-          = example-wise squared error
-    """
-    def __init__(self, L2_regularizer=0,minibatch_size=10000):
-        self.L2_regularizer=L2_regularizer
-        self.equations = LinearRegressionEquations()
-        self.minibatch_size=minibatch_size
-
-    def __call__(self,trainset):
-        first_example = trainset[0]
-        n_inputs = first_example['input'].size
-        n_outputs = first_example['target'].size
-        XtX = numpy.zeros((n_inputs+1,n_inputs+1))
-        XtY = numpy.zeros((n_inputs+1,n_outputs))
-        for i in xrange(n_inputs):
-            XtX[i+1,i+1]=self.L2_regularizer
-        mbs=min(self.minibatch_size,len(trainset))
-        for inputs,targets in trainset.minibatches(["input","target"],minibatch_size=mbs):
-            XtX,XtY=self.equations.update(XtX,XtY,numpy.array(inputs),numpy.array(targets))
-        theta=numpy.linalg.solve(XtX,XtY)
-        return LinearPredictor(theta)
-
-class LinearPredictorEquations(AutoName):
-    inputs = T.matrix() # minibatchsize x n_inputs
-    targets = T.matrix() # minibatchsize x n_outputs
-    theta = T.matrix() # (n_inputs+1) x n_outputs
-    b = theta[0]
-    Wt = theta[1:,:]
-    outputs = T.dot(inputs,Wt) + b # minibatchsize x n_outputs
-    squared_errors = T.sum(T.sqr(targets-outputs),axis=1)
-
-    __compiled = False
-    @classmethod
-    def compile(cls, mode = "FAST_RUN"):
-        if cls.__compiled:
-            return
-        def fn(input_vars,output_vars):
-            return staticmethod(theano.function(input_vars, output_vars, mode=mode))
-
-        cls.compute_outputs = fn([cls.inputs,cls.theta],cls.outputs)
-        cls.compute_errors = fn([cls.outputs,cls.targets],cls.squared_errors)
-
-        cls.__compiled = True
-
-    def __init__(self):
-        self.compile()
-        
-class LinearRegressionEquations(LinearPredictorEquations):
-    P = LinearPredictorEquations
-    XtX = T.matrix() # (n_inputs+1) x (n_inputs+1)
-    XtY = T.matrix() # (n_inputs+1) x n_outputs
-    extended_input = prepend_1_to_each_row(P.inputs)
-    new_XtX = T.add(XtX,T.dot(extended_input.T,extended_input))
-    new_XtY = T.add(XtY,T.dot(extended_input.T,P.targets))
-
-    __compiled = False
-    
-    @classmethod
-    def compile(cls, mode="FAST_RUN"):
-        if cls.__compiled:
-            return
-        def fn(input_vars,output_vars):
-            return staticmethod(theano.function(input_vars, output_vars, mode=mode))
-
-        cls.update = fn([cls.XtX,cls.XtY,cls.P.inputs,cls.P.targets],[cls.new_XtX,cls.new_XtY])
-
-        cls.__compiled = True
-
-    def __init__(self):
-        self.compile()
-
-class LinearPredictor(object):
-    """
-    A linear predictor has parameters theta (a bias vector and a weight matrix)
-    it can use to make a linear prediction (according to the LinearPredictorEquations).
-    It can compute its output (bias + weight * input) and a squared error (||output - target||^2).
-    """
-    def __init__(self, theta):
-        self.theta=theta
-        self.n_inputs=theta.shape[0]-1
-        self.n_outputs=theta.shape[1]
-        self.equations = LinearPredictorEquations()
-
-    def compute_outputs(self,inputs):
-        return self.equations.compute_outputs(inputs,self.theta)
-    def compute_errors(self,inputs,targets):
-        return self.equations.compute_errors(self.compute_outputs(inputs),targets)
-    def compute_outputs_and_errors(self,inputs,targets):
-        outputs = self.compute_outputs(inputs)
-        return [outputs,self.equations.compute_errors(outputs,targets)]
-    def compute_mse(self,inputs,targets):
-        errors = self.compute_errors(inputs,targets)
-        return numpy.sum(errors)/errors.size
-    
-    def __call__(self,dataset,output_fieldnames=None,cached_output_dataset=False):
-        assert dataset.hasFields(["input"])
-        if output_fieldnames is None:
-            if dataset.hasFields(["target"]):
-                output_fieldnames = ["output","squared_error"]
-            else:
-                output_fieldnames = ["output"]
-        output_fieldnames.sort()
-        if output_fieldnames == ["squared_error"]:
-            f = self.compute_errors
-        elif output_fieldnames == ["output"]:
-            f = self.compute_outputs
-        elif output_fieldnames == ["output","squared_error"]:
-            f = self.compute_outputs_and_errors
-        else:
-            raise ValueError("unknown field(s) in output_fieldnames: "+str(output_fieldnames))
-        
-        ds=ApplyFunctionDataSet(dataset,f,output_fieldnames)
-        if cached_output_dataset:
-            return CachedDataSet(ds)
-        else:
-            return ds
-        
-
-def linear_predictor(inputs,params,*otherargs):
-  p = LinearPredictor(params)
-  return p.compute_outputs(inputs)
-
-#TODO : an online version
-class OnlineLinearRegression(OnlineLearningAlgorithm):
-    """
-    Training can proceed sequentially (with multiple calls to update with
-    different disjoint subsets of the training sets). After each call to
-    update the predictor is ready to be used (and optimized for the union
-    of all the training sets passed to update since construction or since
-    the last call to forget).
-    """
-    pass
-
-
-
-    
diff -r 27b1344a57b1 -r 8fff4bc26f4c lookup_list.py
--- a/lookup_list.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,134 +0,0 @@
-
-from copy import deepcopy
-
-class LookupList(object):
-    """
-    A LookupList is a sequence whose elements can be named (and unlike
-    a dictionary the order of the elements depends not on their key but
-    on the order given by the user through construction) so that
-    following syntactic constructions work as one would expect::
-       >>> example = LookupList(['x','y','z'],[1,2,3])
-       >>> example['x'] = [1, 2, 3] # set or change a field
-       >>> print example('z','y') # prints [3,2]
-       >>> x, y, z = example
-       >>> x = example[0]
-       >>> x = example["x"]
-       >>> print example.keys() # prints ['x','y','z']
-       >>> print example.values() # prints [[1,2,3],2,3]
-       >>> print example.items() # prints [('x',[1,2,3]),('y',2),('z',3)]
-       >>> example.append_keyval('u',0) # adds item with name 'u' and value 0
-       >>> print len(example) # number of items = 4 here
-       >>> example2 = LookupList(['v', 'w'], ['a','b'])
-       >>> print example+example2 # addition is like for lists, a concatenation of the items.
-       >>> example + example # throw an error as we can't have duplicate name.
-
-    @note: The element names should be unique.
-
-    @todo: Convert this documentation into doctest
-    format, and actually perform doctest'ing:
-    U{http://epydoc.sourceforge.net/manual-epytext.html#doctest-blocks}
-    """
-    def __init__(self,names=[],values=[]):
-        #print 'values=', values
-        #print 'length=', len(values)
-        #print 'names=', names
-        #print 'length=',len(names)
-        assert len(values)==len(names)
-        self.__dict__['_values']=values
-        self.__dict__['_name2index']={}
-        self.__dict__['_names']=names
-        for i in xrange(len(values)):
-            assert names[i] not in self._name2index
-            self._name2index[names[i]]=i
-
-    def keys(self):
-        return self._names
-
-    def values(self):
-        return self._values
-
-    def items(self):
-        """
-        Return a list of (name,value) pairs of all the items in the look-up list.
-        """
-        return zip(self._names,self._values)
-    
-    def __getitem__(self,key):
-        """
-        The key in example[key] can either be an integer to index the fields
-        or the name of the field.
-        """
-        if isinstance(key,int) or isinstance(key,slice) or (isinstance(key,list) and all([isinstance(i,int) for i in key])):
-            return self._values[key]
-        else: # if not an int, key must be a name
-            # expecting key to be a valid field name
-            assert isinstance(key,str)
-            return self._values[self._name2index[key]]
-    
-    def __setitem__(self,key,value):
-        if isinstance(key,int):
-            self._values[key]=value
-        else: # if not an int, key must be a name
-            if key in self._name2index:
-                self._values[self._name2index[key]]=value
-            else:
-                self.append_keyval(key,value)
-            
-    def append_keyval(self, key, value):
-        assert key not in self._name2index
-        self._name2index[key]=len(self)
-        self._values.append(value)
-        self._names.append(key)
-
-    def append_lookuplist(self, *list):
-        for l in list:
-            for key in l.keys():
-                self.append_keyval(key,l[key])
-        del l
-
-    def __len__(self):
-        return len(self._values)
-
-    def __repr__(self):
-        return "{%s}" % ", ".join([str(k) + "=" + repr(v) for k,v in self.items()])
-
-    def __add__(self,rhs):
-        new_example = deepcopy(self)
-        for item in rhs.items():
-            new_example.append_keyval(item[0],item[1])
-        return new_example
-
-    def __radd__(self,lhs):
-        new_example = deepcopy(lhs)
-        for item in self.items():
-            new_example.append_keyval(item[0],item[1])
-        return new_example
-
-    def __eq__(self, other):
-        return self._values==other._values and self._name2index==other._name2index and self._names==other._names
-
-    def __ne__(self, other):
-        return not self.__eq__(other)
-
-    def __hash__(self):
-        raise NotImplementedError()
-
-    def __call__(self,*names):
-        """
-        Return a list of values associated with the given names (which must all be keys of the lookup list).
-        """
-        if names == self._names:
-            return self._values
-        return [self[name] for name in names]
-
-
-if __name__ == '__main__':
-
-    a=LookupList(['a'],[1])
-    print a
-    b=LookupList(['b'],[2])
-    print b
-    a.append_lookuplist(b)
-    print a
-    a.append_lookuplist(b)
-    print a
diff -r 27b1344a57b1 -r 8fff4bc26f4c make_test_datasets.py
--- a/make_test_datasets.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,118 +0,0 @@
-from dataset import ArrayDataSet
-from shapeset.dset import Polygons
-from linear_regression import linear_predictor
-from kernel_regression import kernel_predictor
-from numpy import *
-
-"""
-General-purpose code to generate artificial datasets that can be used
-to test different learning algorithms.
-"""
-
-
-def make_triangles_rectangles_online_dataset(image_size=(10,10)):
-    """
-    Make a binary classification dataset to discriminate triangle images from rectangle images.
-    """
-    def convert_dataset(dset):
-        # convert the n_vert==3 into target==0 and n_vert==4 into target==1
-        def mapf(images,n_vertices):
-            n=len(n_vertices)
-            targets = ndarray((n,1),dtype='float64')
-            for i in xrange(n):
-                targets[i,0] = array([0. if n_vertices[i]==3 else 1.],dtype='float64')
-            return images.reshape(len(images),images[0].size).astype('float64'),targets
-        return dataset.ApplyFunctionDataSet(dset("image","nvert"),mapf,["input","target"])
-  
-    p=Polygons(image_size,[3,4],fg_min=1./255,fg_max=1./255,rot_max=1.,scale_min=0.35,scale_max=0.9,pos_min=0.1, pos_max=0.9)
-    trainset=convert_dataset(p)
-    return trainset
-
-
-def make_triangles_rectangles_dataset(n_examples=600,image_size=(10,10), cache = True):
-    """
-    Make a binary classification dataset to discriminate triangle images from rectangle images.
-    """
-    def convert_dataset(dset):
-        # convert the n_vert==3 into target==0 and n_vert==4 into target==1
-        def mapf(images,n_vertices):
-            n=len(n_vertices)
-            targets = ndarray((n,1),dtype='float64')
-            for i in xrange(n):
-                targets[i,0] = array([0. if n_vertices[i]==3 else 1.],dtype='float64')
-            return images.reshape(len(images),images[0].size).astype('float64'),targets
-        return dataset.CachedDataSet(dataset.ApplyFunctionDataSet(dset("image","nvert"),mapf,["input","target"]),cache)
-  
-    p=Polygons(image_size,[3,4],fg_min=1./255,fg_max=1./255,rot_max=1.,scale_min=0.35,scale_max=0.9,pos_min=0.1, pos_max=0.9)
-    data = p.subset[0:n_examples]
-    trainset=convert_dataset(data.subset[0:n_examples])
-    return trainset
-
-
-def make_triangles_rectangles_datasets(n_examples=600,train_frac=0.5,image_size=(10,10), cache = True):
-    """
-    Make two binary classification datasets to discriminate triangle images from rectangle images.
-    The first one is the training set, the second is the test set.
-    """
-    data = make_triangles_rectangles_dataset(n_examples=n_examples,image_size=image_size, cache = cache)
-    n_train = int(n_examples*train_frac)
-    trainset=convert_dataset(data.subset[0:n_train])
-    testset=convert_dataset(data.subset[n_train:n_examples])
-    return trainset,testset
-
-
-def make_artificial_datasets_from_function(n_inputs=1,
-                                           n_targets=1,
-                                           n_examples=20,
-                                           train_frac=0.5,
-                                           noise_level=0.1, # add Gaussian noise, noise_level=sigma
-                                           params_shape=None,
-                                           f=None, # function computing E[Y|X]
-                                           otherargs=None, # extra args to f
-                                           b=None): # force theta[0] with this value
-    """
-    Make regression data of the form
-      Y | X ~ Normal(f(X,theta,otherargs),noise_level^2)
-    If n_inputs==1 then X is chosen at regular locations on the [-1,1] interval.
-    Otherwise X is sampled according to a Normal(0,1) on all dimensions (independently).
-    The parameters theta is a matrix of shape params_shape that is sampled from Normal(0,1).
-    Optionally theta[0] is set to the argument 'b', if b is provided.
-
-    Return a training set and a test set, by splitting the generated n_examples
-    according to the 'train_frac'tion.
-    """
-    n_train=int(train_frac*n_examples)
-    n_test=n_examples-n_train
-    if n_inputs==1:
-        delta1=2./n_train
-        delta2=2./n_test
-        inputs = vstack((array(zip(range(n_train)))*delta1-1,
-                         0.5*delta2+array(zip(range(n_test)))*delta2-1))
-    else:
-        inputs = random.normal(size=(n_examples,n_inputs))
-    if not f:
-        f = linear_predictor
-    if f==kernel_predictor and not otherargs[1]:
-        otherargs=(otherargs[0],inputs[0:n_train])
-    if not params_shape:
-        if f==linear_predictor:
-            params_shape = (n_inputs+1,n_targets)
-        elif f==kernel_predictor:
-            params_shape = (otherargs[1].shape[0]+1,n_targets)
-    theta = random.normal(size=params_shape) if params_shape else None
-    if b:
-        theta[0]=b
-    outputs = f(inputs,theta,otherargs)
-    targets = outputs + random.normal(scale=noise_level,size=(n_examples,n_targets))
-    # the | stacking creates a strange bug in LookupList constructor:  
-    #  trainset = ArrayDataSet(inputs[0:n_examples/2],{'input':slice(0,n_inputs)}) | \
-    #             ArrayDataSet(targets[0:n_examples/2],{'target':slice(0,n_targets)}) 
-    #  testset = ArrayDataSet(inputs[n_examples/2:],{'input':slice(0,n_inputs)}) | \
-    #            ArrayDataSet(targets[n_examples/2:],{'target':slice(0,n_targets)})
-    data = hstack((inputs,targets))
-
-    trainset = ArrayDataSet(data[0:n_train],
-                            {'input':slice(0,n_inputs),'target':slice(n_inputs,n_inputs+n_targets)})
-    testset = ArrayDataSet(data[n_train:],
-                           {'input':slice(0,n_inputs),'target':slice(n_inputs,n_inputs+n_targets)})
-    return trainset,testset,theta
diff -r 27b1344a57b1 -r 8fff4bc26f4c misc_theano.py
--- a/misc_theano.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,20 +0,0 @@
-
-import theano
-
-class Print(theano.Op):
-    def __init__(self,message=""):
-        self.message=message
-        self.view_map={0:[0]}
-
-    def make_node(self,xin):
-        xout = xin.type.make_result()
-        return theano.Apply(op = self, inputs = [xin], outputs=[xout])
-
-    def perform(self,node,inputs,output_storage):
-        xin, = inputs
-        xout, = output_storage
-        xout[0] = xin
-        print self.message,xin
-
-    def grad(self,input,output_gradients):
-        return output_gradients
diff -r 27b1344a57b1 -r 8fff4bc26f4c mlp_factory_approach.py
--- a/mlp_factory_approach.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,384 +0,0 @@
-import copy, sys, os
-import numpy
-
-import theano
-from theano import tensor as T
-
-import dataset, nnet_ops, stopper, filetensor
-from pylearn.lookup_list import LookupList
-
-
-class AbstractFunction (Exception): pass
-
-class AutoName(object):
-    """
-    By inheriting from this class, class variables which have a name attribute
-    will have that name attribute set to the class variable name.
-    """
-    class __metaclass__(type):
-         def __init__(cls, name, bases, dct):
-             type.__init__(name, bases, dct)
-             for key, val in dct.items():
-                 assert type(key) is str
-                 if hasattr(val, 'name'): 
-                     val.name = key
-
-class GraphLearner(object):
-    class Model(object):
-        def __init__(self, algo, params):
-            self.algo = algo
-            self.params = params
-            graph = self.algo.graph
-            self.update_fn = algo._fn([graph.input, graph.target] + graph.params,
-                    [graph.nll] + graph.new_params)
-            self._fn_cache = {}
-
-        def __copy__(self):
-            raise Exception('why not called?') 
-            return GraphLearner.Model(self.algo, [copy.copy(p) for p in params])
-
-        def __eq__(self,other,tolerance=0.) :
-            """ Only compares weights of matrices and bias vector. """
-            if not isinstance(other,GraphLearner.Model) :
-                return False
-            for p in range(4) :
-                if self.params[p].shape != other.params[p].shape :
-                    return False
-                if not numpy.all( numpy.abs(self.params[p] - other.params[p]) <= tolerance ) :                    
-                    return False
-            return True
-
-        def _cache(self, key, valfn):
-            d = self._fn_cache
-            if key not in d:
-                d[key] = valfn()
-            return d[key]
-
-        def update_minibatch(self, minibatch):
-            if not isinstance(minibatch, LookupList):
-                print type(minibatch)
-            assert isinstance(minibatch, LookupList)
-            self.update_fn(minibatch['input'], minibatch['target'], *self.params)
-
-        def update(self, dataset, 
-                default_minibatch_size=32):
-            """
-            Update this model from more training data.Uses all the data once, cut
-            into minibatches. No early stopper here. 
-            """
-            params = self.params
-            minibatch_size = min(default_minibatch_size, len(dataset))
-            for mb in dataset.minibatches(['input', 'target'], minibatch_size=minibatch_size):
-                self.update_minibatch(mb)
-
-        def save(self, f):
-            self.algo.graph.save(f, self)
-
-        def __call__(self, testset, fieldnames=['output_class']):
-            """Apply this model (as a function) to new data.
-
-            @param testset: DataSet, whose fields feed Result terms in self.algo.g
-            @type testset: DataSet
-
-            @param fieldnames: names of results in self.algo.g to compute.
-            @type fieldnames: list of strings
-
-            @return: DataSet with fields from fieldnames, computed from testset by
-            this model.  
-            @rtype: ApplyFunctionDataSet instance
-            
-            """
-            graph = self.algo.graph
-            def getresult(name):
-                r = getattr(graph, name)
-                if not isinstance(r, theano.Result):
-                    raise TypeError('string does not name a theano.Result', (name, r))
-                return r
-
-            provided = [getresult(name) for name in testset.fieldNames()]
-            wanted = [getresult(name) for name in fieldnames]
-            inputs = provided + graph.params
-
-            theano_fn = self._cache((tuple(inputs), tuple(wanted)),
-                    lambda: self.algo._fn(inputs, wanted))
-            lambda_fn = lambda *args: theano_fn(*(list(args) + self.params))
-            return dataset.ApplyFunctionDataSet(testset, lambda_fn, fieldnames)
-
-    class Graph(object):
-        class Opt(object):
-            merge = theano.gof.MergeOptimizer()
-            gemm_opt_1 = theano.gof.TopoOptimizer(theano.tensor_opt.gemm_pattern_1)
-            sqr_opt_0 = theano.gof.TopoOptimizer(theano.gof.PatternSub(
-                    (T.mul,'x', 'x'),
-                    (T.sqr, 'x')))
-
-            def __init__(self, do_sqr=True):
-                self.do_sqr = do_sqr
-
-            def __call__(self, env):
-                self.merge(env)
-                self.gemm_opt_1(env)
-                if self.do_sqr:
-                    self.sqr_opt_0(env)
-                self.merge(env)
-
-        def linker(self): 
-            return theano.gof.PerformLinker()
-
-        def early_stopper(self):
-            stopper.NStages(300,1)
-        
-        def train_iter(self, trainset):
-            raise AbstractFunction
-        optimizer = Opt()
-
-        def load(self,f) :
-            raise AbstractFunction
-
-        def save(self,f,model) :
-            raise AbstractFunction
-
-
-    def __init__(self, graph):
-        self.graph = graph
-
-    def _fn(self, inputs, outputs):
-        # Caching here would hamper multi-threaded apps
-        # prefer caching in Model.__call__
-        return theano.function(inputs, outputs, 
-                unpack_single=False,
-                optimizer=self.graph.optimizer,
-                linker=self.graph.linker() if hasattr(self.graph, 'linker')
-                else 'c|py')
-
-    def __call__(self,
-            trainset=None,
-            validset=None,
-            iparams=None,
-            stp=None):
-        """Allocate and optionally train a model
-
-        @param trainset: Data for minimizing the cost function
-        @type trainset: None or Dataset
-
-        @param validset: Data for early stopping
-        @type validset: None or Dataset
-
-        @param input: name of field to use as input
-        @type input: string
-
-        @param target: name of field to use as target
-        @type target: string
-
-        @param stp: early stopper, if None use default in graphMLP.G
-        @type stp: None or early stopper
-
-        @return: model
-        @rtype: GraphLearner.Model instance
-        
-        """
-        
-        iparams = self.graph.iparams() if iparams is None else iparams
-
-        # if we load, type(trainset) == 'str'
-        if isinstance(trainset,str) or isinstance(trainset,file):
-            #loadmodel = GraphLearner.Model(self, iparams)
-            loadmodel = self.graph.load(self,trainset)
-            return loadmodel
-
-        curmodel = GraphLearner.Model(self, iparams)
-        best = curmodel
-        
-        if trainset is not None: 
-            #do some training by calling Model.update_minibatch()
-            if stp == None :
-                stp = self.graph.early_stopper()
-            try :
-                countiter = 0
-                for mb in self.graph.train_iter(trainset):
-                    curmodel.update_minibatch(mb)
-                    if stp.set_score:
-                        if validset:
-                            stp.score = curmodel(validset, ['validset_score'])
-                            if (stp.score < stp.best_score):
-                                best = copy.copy(curmodel)
-                        else:
-                            stp.score = 0.0
-                    countiter +=1 
-                    stp.next()
-            except StopIteration :
-                print 'Iterations stopped after ', countiter,' iterations'
-            if validset:
-                curmodel = best
-        return curmodel
-
-
-def graphMLP(ninputs, nhid, nclass, lr_val, l2coef_val=0.0):
-
-
-    def wrapper(i, node, thunk):
-        if 0:
-            print i, node
-            print thunk.inputs
-            print thunk.outputs
-            if node.op == nnet_ops.crossentropy_softmax_1hot_with_bias:
-                print 'here is the nll op'
-        thunk() #actually compute this piece of the graph
-
-    class G(GraphLearner.Graph, AutoName):
-            
-        lr = T.constant(lr_val)
-        assert l2coef_val == 0.0
-        l2coef = T.constant(l2coef_val)
-        input = T.matrix() # n_examples x n_inputs
-        target = T.ivector() # len: n_examples
-        #target = T.matrix()
-        W2, b2 = T.matrix(), T.vector()
-
-        W1, b1 = T.matrix(), T.vector()
-        hid = T.tanh(b1 + T.dot(input, W1))
-        hid_regularization = l2coef * T.sum(W1*W1)
-
-        params = [W1, b1, W2, b2] 
-        activations = b2 + T.dot(hid, W2)
-        nll, predictions = nnet_ops.crossentropy_softmax_1hot(activations, target )
-        regularization = l2coef * T.sum(W2*W2) + hid_regularization
-        output_class = T.argmax(activations,1)
-        loss_01 = T.neq(output_class, target)
-        #g_params = T.grad(nll + regularization, params)
-        g_params = T.grad(nll, params)
-        new_params = [T.sub_inplace(p, lr * gp) for p,gp in zip(params, g_params)]
-
-            
-        def __eq__(self,other) :
-            print 'G.__eq__ from graphMLP(), not implemented yet'
-            return NotImplemented
-
-
-        def load(self, algo, f):
-            """ Load from file the 2 matrices and bias vectors """
-            cloase_at_end = False
-            if isinstance(f,str) :
-                f = open(f,'r')
-                close_at_end = True
-            params = []
-            for i in xrange(4):
-                params.append(filetensor.read(f))
-            if close_at_end :
-                f.close()
-            return GraphLearner.Model(algo, params)
-
-        def save(self, f, model):
-            """ Save params to file, so 2 matrices and 2 bias vectors. Same order as iparams. """
-            cloase_at_end = False
-            if isinstance(f,str) :
-                f = open(f,'w')
-                close_at_end = True
-            for p in model.params:
-                filetensor.write(f,p)
-            if close_at_end :
-                f.close()
-
-
-        def iparams(self):
-            """ init params. """
-            def randsmall(*shape): 
-                return (numpy.random.rand(*shape) -0.5) * 0.001
-            return [randsmall(ninputs, nhid)
-                    , randsmall(nhid)
-                    , randsmall(nhid, nclass)
-                    , randsmall(nclass)]
-
-        def train_iter(self, trainset):
-            return trainset.minibatches(['input', 'target'],
-                    minibatch_size=min(len(trainset), 32), n_batches=2000)
-        def early_stopper(self): 
-            """ overwrites GraphLearner.graph function """
-            return stopper.NStages(300,1)
-
-    return G()
-
-
-import unittest
-
-class TestMLP(unittest.TestCase):
-    def blah(self, g):
-        training_set1 = dataset.ArrayDataSet(numpy.array([[0, 0, 0],
-                                                         [0, 1, 1],
-                                                         [1, 0, 1],
-                                                         [1, 1, 1]]),
-                                            {'input':slice(2),'target':2})
-        training_set2 = dataset.ArrayDataSet(numpy.array([[0, 0, 0],
-                                                         [0, 1, 1],
-                                                         [1, 0, 0],
-                                                         [1, 1, 1]]),
-                                            {'input':slice(2),'target':2})
-        test_data = dataset.ArrayDataSet(numpy.array([[0, 0, 0],
-                                                         [0, 1, 1],
-                                                         [1, 0, 0],
-                                                         [1, 1, 1]]),
-                                            {'input':slice(2)})
-
-        learn_algo = GraphLearner(g)
-
-        model1 = learn_algo(training_set1)
-
-        model2 = learn_algo(training_set2)
-
-        omatch = [o1 == o2 for o1, o2 in zip(model1(test_data),
-            model2(test_data))]
-
-        n_match = sum(omatch)
-
-        self.failUnless(n_match ==  (numpy.sum(training_set1.fields()['target'] ==
-                training_set2.fields()['target'])), omatch)
-
-        model1.save('/tmp/model1')
-        
-        #denoising_aa = GraphLearner(denoising_g)
-        #model1 = denoising_aa(trainset)
-        #hidset = model(trainset, fieldnames=['hidden'])
-        #model2 = denoising_aa(hidset)
-        
-        #f = open('blah', 'w')
-        #for m in model:
-        #    m.save(f)
-        #filetensor.write(f, initial_classification_weights)
-        #f.flush()
-
-        #deep_sigmoid_net = GraphLearner(deepnetwork_g)
-        #deep_model = deep_sigmoid_net.load('blah')
-        #deep_model.update(trainset)  #do some fine tuning
-
-        model1_dup = learn_algo('/tmp/model1')
-
-
-    def equiv(self, g0, g1):
-        training_set1 = dataset.ArrayDataSet(numpy.array([[0, 0, 0],
-                                                         [0, 1, 1],
-                                                         [1, 0, 1],
-                                                         [1, 1, 1]]),
-                                            {'input':slice(2),'target':2})
-        learn_algo_0 = GraphLearner(g0)
-        learn_algo_1 = GraphLearner(g1)
-
-        model_0 = learn_algo_0(training_set1)
-        model_1 = learn_algo_1(training_set1)
-
-        print '----'
-        for p in zip(model_0.params, model_1.params):
-            abs_rel_err = theano.gradient.numeric_grad.abs_rel_err(p[0], p[1])
-            max_abs_rel_err = numpy.max(abs_rel_err)
-            if max_abs_rel_err > 1.0e-7:
-                print 'p0', p[0]
-                print 'p1', p[1]
-            #self.failUnless(max_abs_rel_err < 1.0e-7, max_abs_rel_err)
-
-
-    def test0(self): self.blah(graphMLP(2, 10, 2, .1))
-    def test1(self): self.blah(graphMLP(2, 3, 2, .1))
-
-if __name__ == '__main__':
-    unittest.main()
-
-
diff -r 27b1344a57b1 -r 8fff4bc26f4c nnet_ops.py
--- a/nnet_ops.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,721 +0,0 @@
-
-import sys
-sys.stderr.write("Use theano.tensor.nnet instead of pylearn.nnet_ops.\n")
-if 0:
-    ## This file contain ops that are not currently integrated in the core of threano. 
-	## Not all of those ops have been thoroughly tested.
-	
-	import theano
-	from theano import tensor, scalar
-	import numpy
-	
-	############
-	#
-	# SCALAR OPS
-	#
-	
-	class ScalarSigmoid(scalar.UnaryScalarOp):
-	    @staticmethod
-	    def st_impl(x):
-	        if x < -30.0:
-	            return 0.0
-	        if x > 30.0:
-	            return 1.0 
-	        return 1.0 / (1.0 + numpy.exp(-x))
-	    def impl(self, x):
-	        return ScalarSigmoid.st_impl(x)
-	    def grad(self, (x,), (gz,)):
-	        y = scalar_sigmoid(x)
-	        return [gz * y * (1.0 - y)]
-	    def c_code(self, node, name, (x,), (z,), sub):
-	        if node.inputs[0].type in [scalar.float32, scalar.float64]:
-	            return """%(z)s =
-	                %(x)s < -30.0 
-	                ? 0.0 
-	                : %(x)s > 30.0 
-	                   ? 1.0
-	                   : 1.0 /(1.0+exp(-%(x)s));""" % locals()
-	        raise NotImplementedError('only floatingpoint is implemented')
-	scalar_sigmoid = ScalarSigmoid(scalar.upgrade_to_float, name='scalar_sigmoid')
-	sigmoid = tensor.Elemwise(scalar_sigmoid, name='sigmoid')
-	
-	class ScalarSoftplus(scalar.UnaryScalarOp):
-	    @staticmethod
-	    def static_impl(x):
-	        if x < -30.0:
-	            return 0.0
-	        if x > 30.0:
-	            return x
-	        return numpy.log1p(numpy.exp(x))
-	    def impl(self, x):
-	        return ScalarSoftplus.static_impl(x)
-	    def grad(self, (x,), (gz,)):
-	        return [gz * scalar_sigmoid(x)]
-	    def c_code(self, node, name, (x,), (z,), sub):
-	        if node.inputs[0].type in [scalar.float32, scalar.float64]:
-	            return """%(z)s =
-	                %(x)s < -30.0 
-	                ? 0.0 
-	                : %(x)s > 30.0 
-	                   ? %(x)s
-	                   : log1p(exp(%(x)s));""" % locals()
-	        raise NotImplementedError('only floating point x is implemented')
-	scalar_softplus = ScalarSoftplus(scalar.upgrade_to_float, name='scalar_softplus')
-	softplus = tensor.Elemwise(scalar_softplus, name='softplus')
-	
-	
-	############
-	#
-	# TENSOR OPS
-	#
-	
-	
-	class SoftmaxWithBias(theano.Op):
-	    """
-	    An L{Op} for the output of neural-net multiclass classifiers.
-	
-	    @type x: is a matrix of floats (32 or 64)
-	    @type b: is a [row] vector of floats (32 or 64), length is number of cols in x
-	
-	    This L{Op}'s output is softmax(x+b).
-	    softmax(x[i]) is the i'th distribution over len(x[i]) options.
-	    """
-	
-	    nin = 2
-	    nout = 1
-	    def __init__(self, **kwargs):
-	        theano.Op.__init__(self, **kwargs)
-	
-	    def make_node(self, x, b):
-	        x = tensor.as_tensor(x)
-	        b = tensor.as_tensor(b)
-	        if x.type.ndim != 2 \
-	                or x.type.dtype not in ['float32', 'float64']:
-	            raise ValueError('x must be 2-d tensor of floats')
-	        if b.type.ndim != 1 \
-	                or x.type.dtype not in ['float32', 'float64']:
-	            raise ValueError('b must be 1-d tensor of floats')
-	
-	        sm = x.type.make_result()
-	        return theano.Apply(self, [x, b], [sm])
-	
-	    def perform(self, node, input_storage, output_storage):
-	        x, b = input_storage
-	        if b.shape[0] != x.shape[1]:
-	            raise ValueError('b must have same number of columns as x')
-	
-	        sm = numpy.zeros_like(x)
-	        for i in xrange(sm.shape[0]):
-	            row = x[i] + b
-	            sm[i] = numpy.exp(row - numpy.max(row))
-	            sm[i] *= 1.0 / numpy.sum(sm[i])
-	        output_storage[0][0] = sm
-	
-	    def grad(self, (x, b), (g_sm,)):
-	        sm = softmax_with_bias(x, b)
-	        dx = SoftmaxWithBiasDx()(g_sm, sm)
-	        db = tensor.sum(dx, axis = 0)
-	        return dx, db
-	
-	    def c_headers(self):
-	        return ['<iostream>']
-	
-	    @staticmethod
-	    def c_code_template():
-	        # this implementation was lifted from
-	        # /u/bergstrj/cvs/bergstrj/src/feb07/nn.cxx
-	
-	        #TODO: put this into a templated function, in the support code
-	        #TODO: declare the max of each row as an Op output
-	
-	        #TODO: set error messages for failures in this code
-	
-	        #TODO: use this to accept float32 and int32: node.inputs[0].type.dtype_specs()[1]
-	        init_decl = """
-	        npy_intp* Nx = %(x)s->dimensions;
-	
-	        if (%(x)s->nd != 2)
-	        {
-	            PyErr_SetString(PyExc_ValueError, "a not 2d tensor");
-	            %(fail)s;
-	        }
-	        if (%(b)s->nd != 1)
-	        {
-	            PyErr_SetString(PyExc_ValueError, "b not 1d tensor");
-	            %(fail)s;
-	        }
-	        if (%(x)s->descr->type_num != PyArray_DOUBLE)
-	        {
-	            PyErr_SetString(PyExc_TypeError, "a not float64");
-	            %(fail)s;
-	        }
-	        if (%(b)s->descr->type_num != PyArray_DOUBLE)
-	        {
-	            PyErr_SetString(PyExc_TypeError, "b not float64");
-	            %(fail)s;
-	        }
-	        if ((%(x)s->dimensions[1] != %(b)s->dimensions[0]))
-	        {
-	            PyErr_SetString(PyExc_ValueError, "dimension mismatch in arguments");
-	            %(fail)s;
-	        }
-	
-	        if ((NULL == %(sm)s)
-	            || (%(sm)s->dimensions[0] != %(x)s->dimensions[0])
-	            || (%(sm)s->dimensions[1] != %(x)s->dimensions[1]))
-	        {
-	            if (NULL != %(sm)s) Py_XDECREF(%(sm)s);
-	            %(sm)s = (PyArrayObject*)PyArray_SimpleNew(2, PyArray_DIMS(%(x)s), type_num_%(x)s);
-	            if(!%(sm)s) {
-	                PyErr_SetString(PyExc_MemoryError, "failed to alloc sm output");
-	                %(fail)s
-	            }
-	        }
-	        """
-	
-	        begin_row_loop = """
-	        for (size_t i = 0; i < Nx[0]; ++i)
-	        {
-	            size_t j;
-	            double sum = 0.0;
-	            bool  discount_max = false;
-	
-	            const double* __restrict__ x_i = (double*)(%(x)s->data + %(x)s->strides[0] * i);
-	            const double* __restrict__ b_i = (double*)(%(b)s->data);
-	            double* __restrict__ sm_i = (double*)(%(sm)s->data + %(sm)s->strides[0] * i);
-	        """
-	
-	        inside_row_loop = """
-	            npy_intp Sx = %(x)s->strides[1]/sizeof(double);
-	            npy_intp Sb = %(b)s->strides[0]/sizeof(double);
-	            npy_intp Ssm = %(sm)s->strides[1]/sizeof(double);
-	
-	            size_t row_max_j=0;
-	            double row_max = x_i[0] + b_i[0];
-	            // Get the maximum value of the row
-	            for (j = 0; j < Nx[1]; ++j)
-	            {
-	                double row_ij = x_i[j * Sx] +  b_i[j * Sb];
-	                row_max_j = (row_ij > row_max) ? j : row_max_j;
-	                row_max   = (row_ij > row_max) ? row_ij : row_max;
-	            }
-	
-	            for (j = 0; j < Nx[1]; ++j)
-	            {
-	                double row_ij = x_i[j * Sx] +  b_i[j * Sb];
-	                double sm_ij = exp(row_ij - row_max);
-	                sum += sm_ij;
-	                sm_i[j * Ssm] = sm_ij;
-	            }
-	            if ( (0.0 == sum) || (isinf(sum)))
-	            {
-	                //that was our best...
-	                %(fail)s;
-	            }
-	
-	            //cblas_dscal(x.N, 1.0 / sum, &mat_at(s,i,0), s.n);
-	            double sum_inv = 1.0 / sum;
-	            for (j = 0; j < Nx[1]; ++j)
-	            {
-	                sm_i[j * Ssm] *= sum_inv;
-	            }
-	
-	        """
-	
-	        end_row_loop = """
-	        }
-	        """
-	
-	        return (init_decl, begin_row_loop, inside_row_loop, end_row_loop)
-	
-	
-	    def c_code(self, node, name, (x, b), (sm,), sub):
-	        code_template = ''.join(self.c_code_template())
-	        return code_template % dict(locals(), **sub)
-	
-	softmax_with_bias = SoftmaxWithBias()
-	
-	
-	class SoftmaxWithBiasDx(theano.Op):
-	    nin = 2
-	    nout = 1
-	    """Gradient wrt x of the SoftmaxWithBias Op"""
-	
-	    def __init__(self, **kwargs):
-	        theano.Op.__init__(self, **kwargs)
-	
-	    def make_node(self, dy, sm, **kwargs):
-	        dy = tensor.as_tensor(dy)
-	        sm = tensor.as_tensor(sm)
-	        return theano.Apply(self, [dy, sm], [sm.type.make_result()])
-	
-	    def perform(self, node, input_storage, output_storage):
-	        dy, sm = input_storage
-	        dx = numpy.zeros_like(sm)
-	        #dx[i,j] = - (\sum_k dy[i,k] sm[i,k]) sm[i,j] + dy[i,j] sm[i,j]
-	        for i in xrange(sm.shape[0]):
-	            dy_times_sm_i = dy[i] * sm[i]
-	            dx[i] = dy_times_sm_i - sum(dy_times_sm_i) * sm[i]
-	        output_storage[0][0] = dx
-	
-	    def grad(self, *args):
-	        raise NotImplementedError()
-	
-	    def c_code(self, node, name, (dy, sm), (dx,), sub):
-	        return '''
-	        if ((%(dy)s->descr->type_num != PyArray_DOUBLE)
-	            || (%(sm)s->descr->type_num != PyArray_DOUBLE))
-	        {
-	            PyErr_SetString(PyExc_TypeError, "types should be float64, float64");
-	            %(fail)s;
-	        }
-	        if ((%(dy)s->nd != 2)
-	            || (%(sm)s->nd != 2))
-	        {
-	            PyErr_SetString(PyExc_ValueError, "rank error");
-	            %(fail)s;
-	        }
-	        if (%(dy)s->dimensions[0] != %(sm)s->dimensions[0])
-	        {
-	            PyErr_SetString(PyExc_ValueError, "dimension mismatch");
-	            %(fail)s;
-	        }
-	        if ((NULL == %(dx)s)
-	            || (%(dx)s->dimensions[0] != %(sm)s->dimensions[0])
-	            || (%(dx)s->dimensions[1] != %(sm)s->dimensions[1]))
-	        {
-	            Py_XDECREF(%(dx)s);
-	            %(dx)s = (PyArrayObject*) PyArray_SimpleNew(2, PyArray_DIMS(%(sm)s),
-	                                                        type_num_%(sm)s);
-	            if (!%(dx)s)
-	            {
-	                PyErr_SetString(PyExc_MemoryError, "failed to alloc dx output");
-	                %(fail)s;
-	            }
-	        }
-	
-	        for (size_t i = 0; i < %(dx)s->dimensions[0]; ++i)
-	        {
-	            const double* __restrict__ dy_i = (double*) (%(dy)s->data + %(dy)s->strides[0] * i);
-	            npy_intp Sdy = %(dy)s->strides[1]/sizeof(double);
-	            const double* __restrict__ sm_i = (double*) (%(sm)s->data + %(sm)s->strides[0] * i);
-	            npy_intp Ssm = %(sm)s->strides[1]/sizeof(double);
-	            double* __restrict__ dx_i = (double*) (%(dx)s->data + %(dx)s->strides[0] * i);
-	            npy_intp Sdx = %(dx)s->strides[1]/sizeof(double);
-	
-	            double sum_dy_times_sm = 0.;
-	            for (size_t j = 0; j < %(dx)s->dimensions[1]; ++j)
-	            {
-	                dx_i[j * Sdx] = dy_i[j * Sdy] * sm_i[j * Ssm];
-	                sum_dy_times_sm += dx_i[j * Sdx];
-	            }
-	            for (size_t j = 0; j < %(dx)s->dimensions[1]; ++j)
-	            {
-	                dx_i[j * Sdx] -= sum_dy_times_sm * sm_i[j * Ssm];
-	            }
-	        }
-	        ''' % dict(locals(), **sub)
-	
-	def softmax(x, **kwargs):
-	    b = tensor.zeros_like(x[0,:])
-	    return softmax_with_bias(x, b, **kwargs)
-	
-	
-	class CrossentropySoftmaxArgmax1HotWithBias(theano.Op):
-	    """A special compound L{Op} for the output of neural-net classifiers.
-	
-	    @type x: is a matrix of floats (32 or 64)
-	    @type b: is a [row] vector of floats (32 or 64), length is number of cols in x
-	    @type y_idx: a [column] vector of int (32 or 64), length is number of rows in x
-	
-	    @precondition: every entry in y_idx is a valid (non-negative) column index into x
-	
-	    This L{Op} has three outputs:
-	     - KL(softmax(x+b), y)
-	     - softmax(x+b)
-	     - argmax(x+b)
-	
-	    softmax(x[i]) is the i'th distribution over len(x[i]) options
-	    argmax(x) is the index of x's greatest element
-	    y_idx[i] is an integer index, encoding a 1-hot distribution. 
-	
-	    In practice, when we're trying to do classification, we have one row in x
-	    and y_idx per example, and y[i] is the index of the (correct) class of the
-	    i'th example.
-	
-	    """
-	    nin=3
-	    nout=3
-	    def __init__(self, **kwargs):
-	        theano.Op.__init__(self, **kwargs)
-	
-	    def make_node(self, x, b, y_idx):
-	        x = tensor.as_tensor(x)
-	        b = tensor.as_tensor(b)
-	        y_idx = tensor.as_tensor(y_idx)
-	        if x.type.ndim != 2 \
-	                or x.type.dtype not in ['float32', 'float64']:
-	            raise ValueError('x must be 2-d tensor of floats')
-	        if b.type.ndim != 1 \
-	                or x.type.dtype not in ['float32', 'float64']:
-	            raise ValueError('b must be 1-d tensor of floats')
-	        if y_idx.type.ndim != 1 \
-	                or y_idx.type.dtype not in ['int8', 'int16', 'int32', 'int64']:
-	            raise ValueError('y_idx must be 1-d tensor of ints')
-	
-	#       TODO: Is this correct? It used to be y, not y_idx
-	        nll = tensor.Tensor(x.type.dtype,
-	                y_idx.type.broadcastable).make_result()
-	#        nll = Tensor(x.dtype, y.broadcastable)
-	        sm = x.type.make_result()
-	        am = y_idx.type.make_result()
-	        return theano.Apply(self, [x, b, y_idx], [nll, sm, am])
-	    def perform(self, node, input_storage, output_storage):
-	        """
-	        The math, where x is an input vector, and t is a target index:
-	
-	            softmax(x)[i] = exp(x[i]) / sum_j(exp(x[j]))
-	            nll(x,t) = -log(softmax(x)[t])
-	
-	        We compute this by subtracting off the max of x. This avoids numerical instability.
-	
-	            m = max_j x[j]
-	            softmax(x)[i] = exp(x[i] -m) / sum_j(exp(x[j] - m))
-	
-	            nll = -log(exp(x[t] -m) / sum_j(exp(x[j] - m)))
-	                = -x[t] + m + log( sum_j(exp(x[j] - m)))
-	
-	        """
-	        x, b, y_idx = input_storage
-	        if b.shape[0] != x.shape[1]:
-	            raise ValueError('b must have same number of columns as x')
-	        if y_idx.shape[0] != x.shape[0]:
-	            raise ValueError('y_idx must have same number of rows as x')
-	
-	        sm = numpy.zeros_like(x) # softmax
-	        nll = numpy.zeros(x.shape[0]) #nll(y | softmax(x))
-	        am = numpy.zeros_like(y_idx)
-	        for i in xrange(sm.shape[0]):
-	            #add the bias vector to the i'th row of x
-	            row = x[i] + b 
-	
-	            #get the maximum value of i'th row for numerically safe softmax / nll
-	            am[i] = numpy.argmax(row)
-	            m = row[am[i]]
-	
-	            #compute the unnormalized softmax, and normalization constant
-	            sm[i] = numpy.exp(row - m) 
-	            sum_j = numpy.sum(sm[i]) # sum_j(exp(x[j] - m))
-	
-	            #normalized our softmax
-	            sm[i] *= 1.0 / sum_j
-	
-	            # store the nll
-	            nll[i] = -row[y_idx[i]] + m + numpy.log(sum_j)
-	            
-	        output_storage[0][0] = nll
-	        output_storage[1][0] = sm
-	        output_storage[2][0] = am
-	    def grad(self, (x, b, y_idx), (g_nll, g_sm, g_am)):
-	        if g_sm is not None or g_am is not None:
-	            raise NotImplementedError()
-	        nll, sm = crossentropy_softmax_1hot_with_bias(x, b, y_idx)
-	        dx = CrossentropySoftmax1HotWithBiasDx()(g_nll, sm, y_idx)
-	        db = tensor.sum(dx, axis = [0])
-	        return dx, db, None
-	
-	    def c_headers(self): return ['<iostream>']
-	
-	    @staticmethod
-	    def c_code_template():
-	        # this implementation was lifted from
-	        # /u/bergstrj/cvs/bergstrj/src/feb07/nn.cxx
-	
-	        #TODO: put this into a templated function, in the support code
-	        #TODO: declare the max of each row as an Op output
-	
-	        #TODO: set error messages for failures in this code
-	
-	        #TODO: use this to accept float32 and int32: node.inputs[0].type.dtype_specs()[1]
-	        (init_decl, begin_row_loop, inside_row_loop, end_row_loop) = \
-	                SoftmaxWithBias.c_code_template()
-	        return (init_decl,
-	                """
-	        if (%(y_idx)s->nd != 1)
-	        {
-	            PyErr_SetString(PyExc_ValueError, "y_idx not 1d tensor");
-	            %(fail)s;
-	        }
-	        if ((%(y_idx)s->descr->type_num != PyArray_INT64)
-	            && (%(y_idx)s->descr->type_num != PyArray_INT32)
-	            && (%(y_idx)s->descr->type_num != PyArray_INT16)
-	            && (%(y_idx)s->descr->type_num != PyArray_INT8))
-	        {
-	            PyErr_SetString(PyExc_TypeError, "y_idx not int8, int16, int32, or int64");
-	            %(fail)s;
-	        }
-	        if (%(x)s->dimensions[0] != %(y_idx)s->dimensions[0])
-	        {
-	            PyErr_SetString(PyExc_ValueError, "dimension mismatch in arguments");
-	            %(fail)s;
-	        }
-	
-	        if ((NULL == %(nll)s) //initial condition
-	            || (%(nll)s->dimensions[0] != %(y_idx)s->dimensions[0]))
-	        {
-	            if (NULL != %(nll)s) Py_XDECREF(%(nll)s);
-	            %(nll)s = (PyArrayObject*)PyArray_SimpleNew(1, PyArray_DIMS(%(y_idx)s), type_num_%(x)s);
-	            if(!%(nll)s)
-	            {
-	                PyErr_SetString(PyExc_MemoryError, "failed to alloc nll output");
-	                %(fail)s;
-	            }
-	        }
-	        if ((NULL == %(am)s)
-	            || (%(am)s->dimensions[0] != %(y_idx)s->dimensions[0]))
-	        {
-	            Py_XDECREF(%(am)s);
-	            %(am)s = (PyArrayObject*) PyArray_SimpleNew(1, PyArray_DIMS(%(y_idx)s), type_num_%(y_idx)s);
-	            if(!%(am)s)
-	            {
-	                PyErr_SetString(PyExc_MemoryError, "failed to alloc am output");
-	                %(fail)s;
-	            }
-	        }
-	                """,
-	                begin_row_loop,
-	                """
-	            const %(y_idx_type)s y_i = ((%(y_idx_type)s*)(%(y_idx)s->data + %(y_idx)s->strides[0] * i))[0];
-	            double* __restrict__ nll_i = (double*)(%(nll)s->data + %(nll)s->strides[0] * i);
-	            %(am_type)s* __restrict__ am_i = (%(am_type)s*) (%(am)s->data + %(am)s->strides[0] * i);
-	                """,
-	                inside_row_loop,
-	                """
-	            nll_i[0] = - x_i[y_i*Sx]
-	                       - b_i[y_i*Sb]
-	                       + row_max
-	                       + log(sum);
-	            am_i[0] = row_max_j;
-	                """,
-	                end_row_loop)
-	
-	
-	    def c_code(self, node, name, (x, b, y_idx), (nll, sm, am), sub):
-	        y_idx_type = node.inputs[2].type.dtype_specs()[1]
-	        am_type = y_idx_type
-	        code_template = ''.join(self.c_code_template())
-	        return code_template % dict(locals(), **sub)
-	
-	class CrossentropySoftmax1HotWithBiasDx (theano.Op):
-	    nin=3
-	    nout=1
-	    """Gradient wrt x of the CrossentropySoftmax1Hot Op"""
-	    def __init__(self, **kwargs):
-	        theano.Op.__init__(self,**kwargs)
-	    def make_node(self, dy, sm, y_idx,**kwargs):
-	        dy = tensor.as_tensor(dy)
-	        sm = tensor.as_tensor(sm)
-	        y_idx = tensor.as_tensor(y_idx)
-	        return theano.Apply(self, [dy, sm, y_idx],[sm.type.make_result()])
-	    def perform(self, node, input_storage, output_storage):
-	        dy,sm,y_idx = input_storage
-	        dx = numpy.zeros_like(sm)
-	        for i in xrange(sm.shape[0]):
-	            dx[i] = dy[i] * sm[i] #vector scale
-	            dx[i, y_idx[i]] -= dy[i] #scalar decrement
-	        output_storage[0][0] = dx
-	    def grad(self, *args):
-	        raise NotImplementedError()
-	    def c_code(self, node, name, (dnll, sm, y_idx), (dx,), sub):
-	        y_idx_type = node.inputs[2].type.dtype_specs()[1]
-	        return """
-	
-	        if ((%(dnll)s->descr->type_num != PyArray_DOUBLE)
-	            || (%(sm)s->descr->type_num != PyArray_DOUBLE)
-	            )
-	        {
-	            PyErr_SetString(PyExc_TypeError, "types should be float64, float64, int64");
-	            %(fail)s;
-	        }
-	        if ((%(y_idx)s->descr->type_num != PyArray_INT64)
-	            && (%(y_idx)s->descr->type_num != PyArray_INT32)
-	            && (%(y_idx)s->descr->type_num != PyArray_INT16)
-	            && (%(y_idx)s->descr->type_num != PyArray_INT8))
-	        {
-	            PyErr_SetString(PyExc_TypeError, "y_idx not int8, int16, int32, or int64");
-	            %(fail)s;
-	        }
-	        if ((%(dnll)s->nd != 1)
-	            || (%(sm)s->nd != 2)
-	            || (%(y_idx)s->nd != 1))
-	        {
-	            PyErr_SetString(PyExc_ValueError, "rank error");
-	            %(fail)s;
-	        }
-	        if ((%(dnll)s->dimensions[0] != %(sm)s->dimensions[0])
-	            || (%(dnll)s->dimensions[0] != %(y_idx)s->dimensions[0]))
-	        {
-	            PyErr_SetString(PyExc_ValueError, "dimension mismatch");
-	            %(fail)s;
-	        }
-	        if ((NULL == %(dx)s)
-	            || (%(dx)s->dimensions[0] != %(sm)s->dimensions[0])
-	            || (%(dx)s->dimensions[1] != %(sm)s->dimensions[1]))
-	        {
-	            if (NULL != %(dx)s) Py_XDECREF(%(dx)s);
-	            %(dx)s = (PyArrayObject*)PyArray_SimpleNew(2, PyArray_DIMS(%(sm)s), type_num_%(sm)s);
-	            if(!%(dx)s) {
-	                PyErr_SetString(PyExc_MemoryError, "failed to alloc dx output");
-	                %(fail)s
-	            }
-	        }
-	
-	        for (size_t i = 0; i < %(dx)s->dimensions[0]; ++i)
-	        {
-	            const double dnll_i = ((double*)(%(dnll)s->data + %(dnll)s->strides[0] * i))[0];
-	
-	            const %(y_idx_type)s y_i = ((%(y_idx_type)s*)(%(y_idx)s->data + %(y_idx)s->strides[0] * i))[0];
-	
-	            const double* __restrict__ sm_i = (double*)(%(sm)s->data + %(sm)s->strides[0] * i);
-	            npy_intp Ssm = %(sm)s->strides[1]/sizeof(double);
-	
-	            double* __restrict__ dx_i = (double*)(%(dx)s->data + %(dx)s->strides[0] * i);
-	            npy_intp Sdx = %(dx)s->strides[1]/sizeof(double);
-	
-	            for (size_t j = 0; j < %(dx)s->dimensions[1]; ++j)
-	            {
-	                dx_i[j * Sdx] = dnll_i * sm_i[j * Ssm];
-	            }
-	            if (y_i >= %(dx)s->dimensions[1])
-	            {
-	                %(fail)s;
-	            }
-	            dx_i[y_i * Sdx] -= dnll_i;
-	        }
-	        """ % dict(locals(), **sub)
-	
-	crossentropy_softmax_argmax_1hot_with_bias = \
-	    CrossentropySoftmaxArgmax1HotWithBias()
-	
-	def crossentropy_softmax_1hot_with_bias(x, b, y_idx, **kwargs):
-	    return crossentropy_softmax_argmax_1hot_with_bias(x, b, y_idx, **kwargs)[0:2]
-	
-	def crossentropy_softmax_1hot(x, y_idx, **kwargs):
-	    b = tensor.zeros_like(x[0,:])
-	    return crossentropy_softmax_1hot_with_bias(x, b, y_idx, **kwargs)
-	
-	
-	class MultinomialCrossentropy1Hot(theano.Op):
-	    pass
-	
-	
-	def binary_crossentropy(output, target):
-	    """
-	    Compute the crossentropy of binary output wrt binary target.
-	    @note: We do not sum, crossentropy is computed by component.
-	    @todo: Rewrite as a scalar, and then broadcast to tensor.
-	    @todo: This is essentially duplicated as cost.cross_entropy
-	    @warning: OUTPUT and TARGET are reversed in cost.cross_entropy
-	    """
-	    return -(target * tensor.log(output) + (1 - target) * tensor.log(1 - output))
-	
-	
-	
-	class Prepend_scalar_constant_to_each_row(theano.Op):
-	    def __init__(self, val = 0):
-	        if isinstance(val, float):
-	            val = scalar.constant(val)
-	        self.val = val
-	
-	    def make_node(self, mat):
-	        #check type of input
-	        if not isinstance(mat,theano.Result) or not mat.type==tensor.matrix().type:
-	            raise TypeError("Expected a matrix as input")
-	        x = tensor.as_tensor(mat)
-	        y = tensor.as_tensor(self.val)
-	        if x.type.dtype != y.type.dtype:
-	            TypeError("the value to prepend don't have the same type as the matrix")
-	
-	        node = theano.Apply(op=self, inputs=[mat], outputs=[tensor.matrix()])
-	        return node
-	
-	    def perform(self, node, (mat, ), (output, )):
-	        new_shape=(mat.shape[0],mat.shape[1]+1)
-	        if output[0] == None:
-	            output[0]=numpy.empty(new_shape,dtype=mat.dtype)
-	            out=output[0]
-	        else:
-	            if output[0].shape!=new_shape:
-	                try:
-	                    output[0].resize(new_shape)
-	                except:
-	                    output[0]=numpy.empty(new_shape, dtype=mat.dtype)
-	            out=output[0]
-	
-	        out[:,0].fill(self.val.data)
-	        out[:,1:]=mat
-	
-	    def grad(self, (mat,), (goutput,)):
-	        return goutput[:,1:]
-	
-	class Prepend_scalar_to_each_row(theano.Op):
-	    def make_node(self, val, mat):
-	        #check type of input
-	        if isinstance(val, float):
-	            val = scalar.constant(val)
-	        if not isinstance(mat,theano.Result) or not mat.type==tensor.matrix().type:
-	            raise TypeError("Expected a matrix as input")
-	        x = tensor.as_tensor(mat)
-	        y = tensor.as_tensor(val)
-	        if x.type.dtype != y.type.dtype:
-	            TypeError("the value to prepend don't have the same type as the matrix")
-	
-	        node = theano.Apply(op=self, inputs=[val,mat], outputs=[tensor.matrix()])
-	        return node
-	
-	    def perform(self, node, (val,mat), (output, )):
-	        new_shape=(mat.shape[0],mat.shape[1]+1)
-	        if output[0] == None:
-	            output[0]=numpy.empty(new_shape,dtype=mat.dtype)
-	            out=output[0]
-	        else:
-	            if output[0].shape!=new_shape:
-	                try:
-	                    output[0].resize(new_shape)
-	                except:
-	                    output[0]=numpy.empty(new_shape, dtype=mat.dtype)
-	            out=output[0]
-	        out[:,0].fill(val)
-	        out[:,1:]=mat
-	
-	    def grad(self, (val, mat), (goutput,)):
-	        return goutput[:,0], goutput[:,1:]
-	
-	prepend_scalar_to_each_row = Prepend_scalar_to_each_row()
-	prepend_0_to_each_row = Prepend_scalar_constant_to_each_row(0.)
-	prepend_1_to_each_row = Prepend_scalar_constant_to_each_row(1.)
-	
-	class solve(theano.Op):
-	    """
-	    Find the solution to the linear equation Ax=b,
-	    where A is a 2d matrix and b is a 1d or 2d matrix.
-	    It use numpy.solve to find the solution.
-	    """
-	
-	    def make_node(self, A, b):
-	        if not isinstance(A, theano.Result) or not A.type==tensor.matrix().type:
-	            raise TypeError("We expected that A had a matrix type")
-	        if not isinstance(B, theano.Result) or not B.type==tensor.matrix().type:
-	            raise TypeError("We expected that B had a matrix type")
-	
-	        node = theano.Apply(op=self, inputs=[A, B], outputs=[tensor.matrix()])
-	        return node
-	
-	    def perform(self, node, (A, B), (output, )):
-	        ret=numpy.solve(A,B)
-	        output[0]=ret
-	
-	    def grad(self, (theta, A, B), (gtheta,)):
-	        raise NotImplementedError()
-	
-	
diff -r 27b1344a57b1 -r 8fff4bc26f4c noise.py
--- a/noise.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,13 +0,0 @@
-def binomial(input, rstate, p = 0.75):
-    """
-    Op to corrupt an input with binomial noise.
-    Generate a noise vector of 1's and 0's (1 with probability p).
-    We multiply this by the input.
-
-    @note: See U{ssh://projects@lgcm.iro.umontreal.ca/repos/denoising_aa}
-    to see how rstate is used.
-    """
-    noise = rstate.gen_like(('binomial',{'p': p, 'n': 1}), input)
-    noise.name = 'noise'
-    return noise * input
-
diff -r 27b1344a57b1 -r 8fff4bc26f4c onehotop.py
--- a/onehotop.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,58 +0,0 @@
-"""
-One hot Op
-"""
-
-#from theano import tensor
-from theano.tensor import as_tensor, Tensor
-from theano.gof import op
-from theano.gof.graph import Apply
-
-import numpy
-
-class OneHot(op.Op):
-    """
-    Construct a one-hot vector, x out of y.
-
-    @todo: Document inputs and outputs
-    @todo: Use 'bool' as output dtype? Or, at least 'int64' ? Not float64!
-    @todo: Use 'bool' as output dtype, not 'int64' ?
-    @todo: Allow this to operate on column vectors (Tensor)
-    @todo: Describe better.
-    """
-
-    def make_node(self, x, y):
-        """
-        @type x: Vector L{Tensor} of integers
-        @param x: The entries of the one-hot vector to be one.
-        @type y: Integer scalar L{Tensor}
-        @param y: The length (#columns) of the one-hot vectors.
-        @return: A L{Tensor} of one-hot vectors
-
-        @precondition: x < y for all entries of x
-        @todo: Check that x and y are int types
-        """
-        x = as_tensor(x)
-        y = as_tensor(y)
-        #assert x.dtype[0:3] == "int"
-        #assert y.dtype[0:3] == "int"
-        inputs = [x, y]
-        ##outputs = [tensor.Tensor("int64", broadcastable=[False, False])]
-        #outputs = [tensor.Tensor("float64", broadcastable=[False, False])]
-        #outputs = [Tensor("int64", broadcastable=[False, False])]
-        outputs = [Tensor("float64", broadcastable=[False, False]).make_result()]
-        node = Apply(op = self, inputs = inputs, outputs = outputs)
-        return node
-
-    def perform(self, node, (x, y), (out, )):
-        assert x.dtype == "int64" or x.dtype == "int32"
-        assert x.ndim == 1
-        assert y.dtype == "int64" or x.dtype == "int32"
-        assert y.ndim == 0
-        out[0] = numpy.zeros((x.shape[0], y), dtype="float64")
-        for c in range(x.shape[0]):
-            assert x[c] < y
-            out[0][c, x[c]] = 1
-
-    def grad(self, (x, y), (out_gradient, )):
-        return None, None
-one_hot = OneHot()
diff -r 27b1344a57b1 -r 8fff4bc26f4c onehotop.py.scalar
--- a/onehotop.py.scalar	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,64 +0,0 @@
-"""
-One hot Op
-"""
-
-#from theano import tensor
-from theano.tensor import as_tensor, Tensor
-#from theano import scalar
-from theano.scalar import as_scalar
-from theano.gof import op
-from theano.gof.graph import Apply
-
-import numpy
-
-class OneHot(op.Op):
-    """
-    Construct a one-hot vector, x out of y.
-
-    @todo: Document inputs and outputs
-    @todo: Use 'bool' as output dtype? Or, at least 'int64' ? Not float64!
-    @todo: Use 'bool' as output dtype, not 'int64' ?
-    @todo: Allow this to operate on column vectors (Tensor)
-    @todo: Describe better.
-    @todo: What type is y?
-    @todo: What about operating on L{Scalar}s?
-    """
-
-    def make_node(self, x, y):
-        """
-        @type x: Vector L{Tensor} of integers
-        @param x: The entries of the one-hot vector to be one.
-        @type y: Integer L{Scalar}
-        @param y: The length (#columns) of the one-hot vectors.
-        @return: A L{Tensor} of one-hot vectors
-
-        @precondition: x < y for all entries of x
-        @todo: Check that x and y are int types
-        """
-        #x = tensor.as_tensor(x)
-        #y = scalar.as_scalar(y)
-        x = as_tensor(x)
-        y = as_scalar(y)
-        #assert x.dtype[0:3] == "int"
-        #assert y.dtype[0:3] == "int"
-        inputs = [x, y]
-        ##outputs = [tensor.Tensor("int64", broadcastable=[False, False])]
-        #outputs = [tensor.Tensor("float64", broadcastable=[False, False])]
-        #outputs = [Tensor("int64", broadcastable=[False, False])]
-        outputs = [Tensor("float64", broadcastable=[False, False]).make_result()]
-        node = Apply(op = self, inputs = inputs, outputs = outputs)
-        return node
-
-    def perform(self, node, (x, y), (out, )):
-        assert x.dtype == "int64"
-        assert type(y) == numpy.int64
-        assert x.ndim == 1
-        #out = numpy.zeros((x.shape[0], y), dtype="int64")
-        out[0] = numpy.zeros((x.shape[0], y), dtype="float64")
-        for c in range(x.shape[0]):
-            assert x[c] < y
-            out[0][c, x[c]] = 1
-
-    def grad(self, (x, y), (out_gradient, )):
-        return None, None
-one_hot = OneHot()
diff -r 27b1344a57b1 -r 8fff4bc26f4c pmat.py
--- a/pmat.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,526 +0,0 @@
-## Automatically adapted for numpy.numarray Jun 13, 2007 by python_numarray_to_numpy (-xsm)
-
-# PMat.py
-# Copyright (C) 2005 Pascal Vincent
-#
-#  Redistribution and use in source and binary forms, with or without
-#  modification, are permitted provided that the following conditions are met:
-#
-#   1. Redistributions of source code must retain the above copyright
-#      notice, this list of conditions and the following disclaimer.
-#
-#   2. Redistributions in binary form must reproduce the above copyright
-#      notice, this list of conditions and the following disclaimer in the
-#      documentation and/or other materials provided with the distribution.
-#
-#   3. The name of the authors may not be used to endorse or promote
-#      products derived from this software without specific prior written
-#      permission.
-#
-#  THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
-#  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
-#  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
-#  NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
-#  TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-#  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-#  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-#  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-#  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#
-#  This file is part of the PLearn library. For more information on the PLearn
-#  library, go to the PLearn Web site at www.plearn.org
-
-
-# Author: Pascal Vincent
-
-#import numarray, sys, os, os.path
-import numpy.numarray, sys, os, os.path
-import fpconst
-
-def array_columns( a, cols ):
-    indices = None
-    if isinstance( cols, int ):
-        indices = [ cols ]
-    elif isinstance( cols, slice ):
-        #print cols
-        indices = range( *cols.indices(cols.stop) )
-    else:
-        indices = list( cols )            
-
-    return numpy.numarray.take(a, indices, axis=1)
-
-def load_pmat_as_array(fname):
-    s = file(fname,'rb').read()
-    formatstr = s[0:64]
-    datastr = s[64:]
-    structuretype, l, w, data_type, endianness = formatstr.split()
-
-    if data_type=='DOUBLE':
-        elemtype = 'd'
-    elif data_type=='FLOAT':
-        elemtype = 'f'
-    else:
-        raise ValueError('Invalid data type in file header: '+data_type)
-
-    if endianness=='LITTLE_ENDIAN':
-        byteorder = 'little'
-    elif endianness=='BIG_ENDIAN':
-        byteorder = 'big'
-    else:
-        raise ValueError('Invalid endianness in file header: '+endianness)
-
-    l = int(l)
-    w = int(w)
-    X = numpy.numarray.fromstring(datastr,elemtype, shape=(l,w) )
-    if byteorder!=sys.byteorder:
-        X.byteswap(True)
-    return X
-
-def load_pmat_as_array_dataset(fname):
-    import dataset,lookup_list
-    
-    #load the pmat as array
-    a=load_pmat_as_array(fname)
-    
-    #load the fieldnames
-    fieldnames = []
-    fieldnamefile = os.path.join(fname+'.metadata','fieldnames')
-    if os.path.isfile(fieldnamefile):
-        f = open(fieldnamefile)
-        for row in f:
-            row = row.split()
-            if len(row)>0:
-                fieldnames.append(row[0])
-        f.close()
-    else:
-        self.fieldnames = [ "field_"+str(i) for i in range(a.shape[1]) ]
-
-    return dataset.ArrayDataSet(a,lookup_list.LookupList(fieldnames,[x for x in range(a.shape[1])]))
-
-def load_amat_as_array_dataset(fname):
-    import dataset,lookup_list
-    
-    #load the amat as array
-    (a,fieldnames)=readAMat(fname)
-    
-    #load the fieldnames
-    if len(fieldnames)==0:
-        self.fieldnames = [ "field_"+str(i) for i in range(a.shape[1]) ]
-
-    return dataset.ArrayDataSet(a,lookup_list.LookupList(fieldnames,[x for x in range(a.shape[1])]))
-
-def save_array_dataset_as_pmat(fname,ds):
-    ar=ds.data
-    save_array_as_pmat(fname,ar,ds.fieldNames())
-
-def save_array_as_pmat( fname, ar, fieldnames=[] ):
-    s = file(fname,'wb')
-    
-    length, width = ar.shape
-    if fieldnames:
-        assert len(fieldnames) == width
-        metadatadir = fname+'.metadata'
-        if not os.path.isdir(metadatadir):
-            os.mkdir(metadatadir)
-        fieldnamefile = os.path.join(metadatadir,'fieldnames')
-        f = open(fieldnamefile,'wb')
-        for name in fieldnames:
-            f.write(name+'\t0\n')
-        f.close()
-    
-    header = 'MATRIX ' + str(length) + ' ' + str(width) + ' '
-    if ar.dtype.char=='d':
-        header += 'DOUBLE '
-        elemsize = 8
-
-    elif ar.dtype.char=='f':
-        header += 'FLOAT '
-        elemsize = 4
-
-    else:
-        raise TypeError('Unsupported typecode: %s' % ar.dtype.char)
-
-    rowsize = elemsize*width
-
-    if sys.byteorder=='little':
-        header += 'LITTLE_ENDIAN '
-    elif sys.byteorder=='big':
-        header += 'BIG_ENDIAN '
-    else:
-        raise TypeError('Unsupported sys.byteorder: '+repr(sys.byteorder))
-
-    header += ' '*(63-len(header))+'\n'
-    s.write( header )
-    s.write( ar.tostring() )
-    s.close()    
-
-
-#######  Iterators  ###########################################################
-
-class VMatIt:
-    def __init__(self, vmat):                
-        self.vmat = vmat
-        self.cur_row = 0
-
-    def __iter__(self):
-        return self
-
-    def next(self):
-        if self.cur_row==self.vmat.length:
-            raise StopIteration
-        row = self.vmat.getRow(self.cur_row)
-        self.cur_row += 1
-        return row    
-
-class ColumnIt:
-    def __init__(self, vmat, col):                
-        self.vmat = vmat
-        self.col  = col
-        self.cur_row = 0
-
-    def __iter__(self):
-        return self
-
-    def next(self):
-        if self.cur_row==self.vmat.length:
-            raise StopIteration
-        val = self.vmat[self.cur_row, self.col]
-        self.cur_row += 1
-        return val
-
-#######  VMat classes  ########################################################
-
-class VMat:
-    def __iter__(self):
-        return VMatIt(self)
-    
-    def __getitem__( self, key ):
-        if isinstance( key, slice ):
-            start, stop, step = key.start, key.stop, key.step
-            if step!=None:
-                raise IndexError('Extended slice with step not currently supported')
-
-            if start is None:
-                start = 0
-
-            l = self.length
-            if stop is None or stop > l:
-                stop = l
-
-            return self.getRows(start,stop-start)
-        
-        elif isinstance( key, tuple ):
-            # Basically returns a SubVMatrix
-            assert len(key) == 2
-            rows = self.__getitem__( key[0] )
-
-            shape = rows.shape                       
-            if len(shape) == 1:
-                return rows[ key[1] ]
-
-            cols = key[1]
-            if isinstance(cols, slice):
-                start, stop, step = cols.start, cols.stop, cols.step
-                if start is None:
-                    start = 0
-
-                if stop is None:
-                    stop = self.width
-                elif stop < 0:
-                    stop = self.width+stop
-
-                cols = slice(start, stop, step)
-
-            return array_columns(rows, cols)
-
-        elif isinstance( key, str ):
-            # The key is considered to be a fieldname and a column is
-            # returned.
-            try:
-                return array_columns( self.getRows(0,self.length),
-                                      self.fieldnames.index(key)  )
-            except ValueError:
-                print >>sys.stderr, "Key is '%s' while fieldnames are:" % key
-                print >>sys.stderr, self.fieldnames
-                raise
-                
-        else:
-            if key<0: key+=self.length
-            return self.getRow(key)
-
-    def getFieldIndex(self, fieldname):
-        try:
-            return self.fieldnames.index(fieldname)
-        except ValueError:
-            raise ValueError( "VMat has no field named %s. Field names: %s"
-                              %(fieldname, ','.join(self.fieldnames)) )
-
-class PMat( VMat ):
-
-    def __init__(self, fname, openmode='r', fieldnames=[], elemtype='d',
-                 inputsize=-1, targetsize=-1, weightsize=-1, array = None):
-        self.fname = fname
-        self.inputsize = inputsize
-        self.targetsize = targetsize
-        self.weightsize = weightsize
-        if openmode=='r':
-            self.f = open(fname,'rb')
-            self.read_and_parse_header()
-            self.load_fieldnames()            
-                
-        elif openmode=='w':
-            self.f = open(fname,'w+b')
-            self.fieldnames = fieldnames
-            self.save_fieldnames()
-            self.length = 0
-            self.width = len(fieldnames)
-            self.elemtype = elemtype
-            self.swap_bytes = False
-            self.write_header()            
-            
-        elif openmode=='a':
-            self.f = open(fname,'r+b')
-            self.read_and_parse_header()
-            self.load_fieldnames()
-
-        else:
-            raise ValueError("Currently only supported openmodes are 'r', 'w' and 'a': "+repr(openmode)+" is not supported")
-
-        if array is not None:
-            shape  = array.shape
-            if len(shape) == 1:
-                row_format = lambda r: [ r ]
-            elif len(shape) == 2:
-                row_format = lambda r: r
-
-            for row in array:
-                self.appendRow( row_format(row) )
-
-    def __del__(self):
-        self.close()
-
-    def write_header(self):
-        header = 'MATRIX ' + str(self.length) + ' ' + str(self.width) + ' '
-
-        if self.elemtype=='d':
-            header += 'DOUBLE '
-            self.elemsize = 8
-        elif self.elemtype=='f':
-            header += 'FLOAT '
-            self.elemsize = 4
-        else:
-            raise TypeError('Unsupported elemtype: '+repr(elemtype))
-        self.rowsize = self.elemsize*self.width
-
-        if sys.byteorder=='little':
-            header += 'LITTLE_ENDIAN '
-        elif sys.byteorder=='big':
-            header += 'BIG_ENDIAN '
-        else:
-            raise TypeError('Unsupported sys.byteorder: '+repr(sys.byteorder))
-        
-        header += ' '*(63-len(header))+'\n'
-
-        self.f.seek(0)
-        self.f.write(header)
-        
-    def read_and_parse_header(self):        
-        header = self.f.read(64)        
-        mat_type, l, w, data_type, endianness = header.split()
-        if mat_type!='MATRIX':
-            raise ValueError('Invalid file header (should start with MATRIX)')
-        self.length = int(l)
-        self.width = int(w)
-        if endianness=='LITTLE_ENDIAN':
-            byteorder = 'little'
-        elif endianness=='BIG_ENDIAN':
-            byteorder = 'big'
-        else:
-            raise ValueError('Invalid endianness in file header: '+endianness)
-        self.swap_bytes = (byteorder!=sys.byteorder)
-
-        if data_type=='DOUBLE':
-            self.elemtype = 'd'
-            self.elemsize = 8
-        elif data_type=='FLOAT':
-            self.elemtype = 'f'
-            self.elemsize = 4
-        else:
-            raise ValueError('Invalid data type in file header: '+data_type)
-        self.rowsize = self.elemsize*self.width
-
-    def load_fieldnames(self):
-        self.fieldnames = []
-        fieldnamefile = os.path.join(self.fname+'.metadata','fieldnames')
-        if os.path.isfile(fieldnamefile):
-            f = open(fieldnamefile)
-            for row in f:
-                row = row.split()
-                if len(row)>0:
-                    self.fieldnames.append(row[0])
-            f.close()
-        else:
-            self.fieldnames = [ "field_"+str(i) for i in range(self.width) ]
-            
-    def save_fieldnames(self):
-        metadatadir = self.fname+'.metadata'
-        if not os.path.isdir(metadatadir):
-            os.mkdir(metadatadir)
-        fieldnamefile = os.path.join(metadatadir,'fieldnames')
-        f = open(fieldnamefile,'wb')
-        for name in self.fieldnames:
-            f.write(name+'\t0\n')
-        f.close()
-
-    def getRow(self,i):
-        if i<0 or i>=self.length:
-            raise IndexError('PMat index out of range')
-        self.f.seek(64+i*self.rowsize)
-        data = self.f.read(self.rowsize)
-        ar = numpy.numarray.fromstring(data, self.elemtype, (self.width,))
-        if self.swap_bytes:
-            ar.byteswap(True)
-        return ar
-
-    def getRows(self,i,l):
-        if i<0 or l<0 or i+l>self.length:
-            raise IndexError('PMat index out of range')
-        self.f.seek(64+i*self.rowsize)
-        data = self.f.read(l*self.rowsize)
-        ar = numpy.numarray.fromstring(data, self.elemtype, (l,self.width))
-        if self.swap_bytes:
-            ar.byteswap(True)
-        return ar
-
-    def checkzerorow(self,i):
-        if i<0 or i>self.length:
-            raise IndexError('PMat index out of range')
-        self.f.seek(64+i*self.rowsize)
-        data = self.f.read(self.rowsize)
-        ar = numpy.numarray.fromstring(data, self.elemtype, (len(data)/self.elemsize,))
-        if self.swap_bytes:
-            ar.byteswap(True)
-        for elem in ar:
-            if elem!=0:
-                return False
-        return True
-    
-    def putRow(self,i,row):
-        if i<0 or i>=self.length:
-            raise IndexError('PMat index out of range')
-        if len(row)!=self.width:
-            raise TypeError('length of row ('+str(len(row))+ ') differs from matrix width ('+str(self.width)+')')
-        if i<0 or i>=self.length:
-            raise IndexError
-        if self.swap_bytes: # must make a copy and swap bytes
-            ar = numpy.numarray.numarray(row,type=self.elemtype)
-            ar.byteswap(True)
-        else: # asarray makes a copy if not already a numarray of the right type
-            ar = numpy.numarray.asarray(row,type=self.elemtype)
-        self.f.seek(64+i*self.rowsize)
-        self.f.write(ar.tostring())
-
-    def appendRow(self,row):
-        if len(row)!=self.width:
-            raise TypeError('length of row ('+str(len(row))+ ') differs from matrix width ('+str(self.width)+')')
-        if self.swap_bytes: # must make a copy and swap bytes
-            ar = numpy.numarray.numarray(row,type=self.elemtype)
-            ar.byteswap(True)
-        else: # asarray makes a copy if not already a numarray of the right type
-            ar = numpy.numarray.asarray(row,type=self.elemtype)
-
-        self.f.seek(64+self.length*self.rowsize)
-        self.f.write(ar.tostring())
-        self.length += 1
-        self.write_header() # update length in header
-
-    def flush(self):
-        self.f.flush()
-
-    def close(self):
-        if hasattr(self, 'f'):
-            self.f.close()
-
-    def append(self,row):
-        self.appendRow(row)
-
-    def __setitem__(self, i, row):
-        l = self.length
-        if i<0: i+=l
-        self.putRow(i,row)
-
-    def __len__(self):
-        return self.length
-
-
-
-#copied from PLEARNDIR:python_modules/plearn/vmat/readAMat.py
-def safefloat(str):
-    """Convert the given string to its float value. It is 'safe' in the sense
-    that missing values ('nan') will be properly converted to the corresponding
-    float value under all platforms, contrarily to 'float(str)'.
-    """
-    if str.lower() == 'nan':
-        return fpconst.NaN
-    else:
-        return float(str)
-
-#copied from PLEARNDIR:python_modules/plearn/vmat/readAMat.py
-def readAMat(amatname):
-    """Read a PLearn .amat file and return it as a numarray Array.
-
-    Return a tuple, with as the first argument the array itself, and as
-    the second argument the fieldnames (list of strings).
-    """
-    ### NOTE: this version is much faster than first creating the array and
-    ### updating each row as it is read...  Bizarrely enough
-    f = open(amatname)
-    a = []
-    fieldnames = []
-    for line in f:
-        if line.startswith("#size:"):
-            (length,width) = line[6:].strip().split()
-        elif line.startswith("#sizes:"):  # ignore input/target/weight/extra sizes
-            continue
-
-        elif line.startswith("#:"):
-            fieldnames = line[2:].strip().split()
-            pass
-        elif not line.startswith('#'):
-            # Add all non-comment lines.
-            row = [ safefloat(x) for x in line.strip().split() ]
-            if row:
-                a.append(row)
-
-    f.close()
-    return numpy.numarray.array(a), fieldnames
-
-            
-if __name__ == '__main__':
-    pmat = PMat( 'tmp.pmat', 'w', fieldnames=['F1', 'F2'] )
-    pmat.append( [1, 2] )
-    pmat.append( [3, 4] )
-    pmat.close()
-
-    pmat = PMat( 'tmp.pmat', 'r' )
-    ar=load_pmat_as_array('tmp.pmat')
-    ds=load_pmat_as_array_dataset('tmp.pmat')
-    
-    print "PMat",pmat
-    print "PMat",pmat[:]
-    print "array",ar
-    print "ArrayDataSet",ds
-    for i in ds:
-        print i
-    save_array_dataset_as_pmat("tmp2.pmat",ds)
-    ds2=load_pmat_as_array_dataset('tmp2.pmat')
-    for i in ds2:
-        print i
-    # print "+++ tmp.pmat contains: "
-    # os.system( 'plearn vmat cat tmp.pmat' )
-    import shutil
-    for fname in ["tmp.pmat", "tmp2.pmat"]:
-        os.remove( fname )
-        if os.path.exists( fname+'.metadata' ):
-            shutil.rmtree( fname+'.metadata' )
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/__init__.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/__init__.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,6 @@
+#import exceptions
+
+def __src_version__():
+    #todo - this is vulnerable to the bug in theano ticket #160
+    return version.src_version(__name__)
+
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/algorithms/__init__.py
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/algorithms/aa.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/algorithms/aa.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,108 @@
+
+import theano
+from theano import tensor as T
+from theano.tensor import nnet as NN
+import numpy as N
+
+class AutoEncoder(theano.Module):
+
+    def __init__(self, input = None, regularize = True, tie_weights = True):
+        super(AutoEncoder, self).__init__()
+
+        # MODEL CONFIGURATION
+        self.regularize = regularize
+        self.tie_weights = tie_weights
+
+        # ACQUIRE/MAKE INPUT
+        if not input:
+            input = T.matrix('input')
+        self.input = theano.External(input)
+
+        # HYPER-PARAMETERS
+        self.lr = theano.Member(T.scalar())
+
+        # PARAMETERS
+        self.w1 = theano.Member(T.matrix())
+        if not tie_weights:
+            self.w2 = theano.Member(T.matrix())
+        else:
+            self.w2 = self.w1.T
+        self.b1 = theano.Member(T.vector())
+        self.b2 = theano.Member(T.vector())
+
+        # HIDDEN LAYER
+        self.hidden_activation = T.dot(input, self.w1) + self.b1
+        self.hidden = self.build_hidden()
+
+        # RECONSTRUCTION LAYER
+        self.output_activation = T.dot(self.hidden, self.w2) + self.b2
+        self.output = self.build_output()
+
+        # RECONSTRUCTION COST
+        self.reconstruction_cost = self.build_reconstruction_cost()
+
+        # REGULARIZATION COST
+        self.regularization = self.build_regularization()
+
+        # TOTAL COST
+        self.cost = self.reconstruction_cost
+        if self.regularize:
+            self.cost = self.cost + self.regularization
+
+        # GRADIENTS AND UPDATES
+        if self.tie_weights:
+            self.params = self.w1, self.b1, self.b2
+        else:
+            self.params = self.w1, self.w2, self.b1, self.b2
+        gradients = T.grad(self.cost, self.params)
+        updates = dict((p, p - self.lr * g) for p, g in zip(self.params, gradients))
+
+        # INTERFACE METHODS
+        self.update = theano.Method(input, self.cost, updates)
+        self.reconstruction = theano.Method(input, self.output)
+        self.representation = theano.Method(input, self.hidden)
+
+    def _instance_initialize(self, obj, input_size = None, hidden_size = None, seed = None, **init):
+        if (input_size is None) ^ (hidden_size is None):
+            raise ValueError("Must specify hidden_size and input_size or neither.")
+        super(AutoEncoder, self)._instance_initialize(obj, **init)
+        if seed is not None:
+            R = N.random.RandomState(seed)
+        else:
+            R = N.random
+        if input_size is not None:
+            sz = (input_size, hidden_size)
+            range = 1/N.sqrt(input_size)
+            obj.w1 = R.uniform(size = sz, low = -range, high = range)
+            if not self.tie_weights:
+                obj.w2 = R.uniform(size = list(reversed(sz)), low = -range, high = range)
+            obj.b1 = N.zeros(hidden_size)
+            obj.b2 = N.zeros(input_size)
+
+    def build_regularization(self):
+        return T.zero() # no regularization!
+
+
+class SigmoidXEAutoEncoder(AutoEncoder):
+
+    def build_hidden(self):
+        return NN.sigmoid(self.hidden_activation)
+
+    def build_output(self):
+        return NN.sigmoid(self.output_activation)
+
+    def build_reconstruction_cost(self):
+        self.reconstruction_cost_matrix = self.input * T.log(self.output) + (1.0 - self.input) * T.log(1.0 - self.output)
+        self.reconstruction_costs = -T.sum(self.reconstruction_cost_matrix, axis=1)
+        return T.sum(self.reconstruction_costs)
+
+    def build_regularization(self):
+        self.l2_coef = theano.Member(T.scalar())
+        if self.tie_weights:
+            return self.l2_coef * T.sum(self.w1 * self.w1)
+        else:
+            return self.l2_coef * T.sum(self.w1 * self.w1) + T.sum(self.w2 * self.w2)
+
+    def _instance_initialize(self, obj, input_size = None, hidden_size = None, **init):
+        init.setdefault('l2_coef', 0)
+        super(SigmoidXEAutoEncoder, self)._instance_initialize(obj, input_size, hidden_size, **init)
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/algorithms/cost.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/algorithms/cost.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,33 @@
+"""
+Cost functions.
+
+@note: All of these functions return one cost per example. So it is your
+job to perform a tensor.sum over the individual example losses.
+
+@todo: Make a Cost class, with a particular contract.
+
+@todo: It would be nice to implement a hinge loss, with a particular margin.
+"""
+
+import theano.tensor as T
+from theano.tensor.xlogx import xlogx
+
+def quadratic(target, output, axis=1):
+    return T.mean(T.sqr(target - output), axis=axis)
+
+def cross_entropy(target, output, mean_axis=0, sum_axis=1):
+    """
+    @todo: This is essentially duplicated as nnet_ops.binary_crossentropy
+    @warning: OUTPUT and TARGET are reversed in nnet_ops.binary_crossentropy
+    """
+    XE = target * T.log(output) + (1 - target) * T.log(1 - output)
+    return -T.mean(T.sum(XE, axis=sum_axis),axis=mean_axis)
+
+def KL_divergence(target, output):
+    """
+    @note: We do not compute the mean, because if target and output have
+    different shapes then the result will be garbled.
+    """
+    return -(target * T.log(output) + (1 - target) * T.log(1 - output)) \
+            + (xlogx(target) + xlogx(1 - target))
+#    return cross_entropy(target, output, axis) - cross_entropy(target, target, axis)
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/algorithms/daa.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/algorithms/daa.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,188 @@
+
+import theano
+from theano import tensor as T
+from theano.tensor import nnet as NN
+from theano.tensor.deprecated import rmodule
+
+import numpy as N
+
+from pylearn.algorithms import cost
+
+class DenoisingAA(rmodule.RModule):
+    """De-noising Auto-encoder
+
+    WRITEME
+
+    Abstract base class. Requires subclass with functions:
+    
+    - build_corrupted_input()
+
+    Introductory article about this model WRITEME.
+
+
+    """
+
+    def __init__(self, input = None, regularize = True, tie_weights = True,
+            activation_function=NN.sigmoid, reconstruction_cost_function=cost.cross_entropy):
+        """
+        :param input: WRITEME
+
+        :param regularize: WRITEME
+
+        :param tie_weights: WRITEME
+
+        :param activation_function: WRITEME
+
+        :param reconstruction_cost: Should return one cost per example (row)
+
+        :todo: Default noise level for all daa levels
+
+        """
+        super(DenoisingAA, self).__init__()
+
+        # MODEL CONFIGURATION
+        self.regularize = regularize
+        self.tie_weights = tie_weights
+        self.activation_function = activation_function
+        self.reconstruction_cost_function = reconstruction_cost_function
+
+        # ACQUIRE/MAKE INPUT
+        if not input:
+            input = T.matrix('input')
+        self.input = theano.External(input)
+
+        # HYPER-PARAMETERS
+        self.lr = theano.Member(T.scalar())
+
+        # PARAMETERS
+        self.w1 = theano.Member(T.matrix())
+        if not tie_weights:
+            self.w2 = theano.Member(T.matrix())
+        else:
+            self.w2 = self.w1.T
+        self.b1 = theano.Member(T.vector())
+        self.b2 = theano.Member(T.vector())
+
+
+        # REGULARIZATION COST
+        self.regularization = self.build_regularization()
+
+
+        ### NOISELESS ###
+
+        # HIDDEN LAYER
+        self.hidden_activation = T.dot(self.input, self.w1) + self.b1
+        self.hidden = self.hid_activation_function(self.hidden_activation)
+
+        # RECONSTRUCTION LAYER
+        self.output_activation = T.dot(self.hidden, self.w2) + self.b2
+        self.output = self.out_activation_function(self.output_activation)
+
+        # RECONSTRUCTION COST
+        self.reconstruction_costs = self.build_reconstruction_costs(self.output)
+        self.reconstruction_cost = T.mean(self.reconstruction_costs)
+
+        # TOTAL COST
+        self.cost = self.reconstruction_cost
+        if self.regularize:
+            self.cost = self.cost + self.regularization
+
+
+        ### WITH NOISE ###
+        self.corrupted_input = self.build_corrupted_input()
+
+        # HIDDEN LAYER
+        self.nhidden_activation = T.dot(self.corrupted_input, self.w1) + self.b1
+        self.nhidden = self.hid_activation_function(self.nhidden_activation)
+
+        # RECONSTRUCTION LAYER
+        self.noutput_activation = T.dot(self.nhidden, self.w2) + self.b2
+        self.noutput = self.out_activation_function(self.noutput_activation)
+
+        # RECONSTRUCTION COST
+        self.nreconstruction_costs = self.build_reconstruction_costs(self.noutput)
+        self.nreconstruction_cost = T.mean(self.nreconstruction_costs)
+
+        # TOTAL COST
+        self.ncost = self.nreconstruction_cost
+        if self.regularize:
+            self.ncost = self.ncost + self.regularization
+
+
+        # GRADIENTS AND UPDATES
+        if self.tie_weights:
+            self.params = self.w1, self.b1, self.b2
+        else:
+            self.params = self.w1, self.w2, self.b1, self.b2
+        gradients = T.grad(self.ncost, self.params)
+        updates = dict((p, p - self.lr * g) for p, g in zip(self.params, gradients))
+
+        # INTERFACE METHODS
+        self.update = theano.Method(self.input, self.ncost, updates)
+        self.compute_cost = theano.Method(self.input, self.cost)
+        self.noisify = theano.Method(self.input, self.corrupted_input)
+        self.reconstruction = theano.Method(self.input, self.output)
+        self.representation = theano.Method(self.input, self.hidden)
+        self.reconstruction_through_noise = theano.Method(self.input, [self.corrupted_input, self.noutput])
+
+        self.validate = theano.Method(self.input, [self.cost, self.output])
+
+    def _instance_initialize(self, obj, input_size = None, hidden_size = None, seed = None, **init):
+        if (input_size is None) ^ (hidden_size is None):
+            raise ValueError("Must specify input_size and hidden_size or neither.")
+        super(DenoisingAA, self)._instance_initialize(obj, **init)
+        if seed is not None:
+            R = N.random.RandomState(seed)
+        else:
+            R = N.random
+        if input_size is not None:
+            sz = (input_size, hidden_size)
+            inf = 1/N.sqrt(input_size)
+            hif = 1/N.sqrt(hidden_size)
+            obj.w1 = R.uniform(size = sz, low = -inf, high = inf)
+            if not self.tie_weights:
+                obj.w2 = R.uniform(size = list(reversed(sz)), low = -hif, high = hif)
+            obj.b1 = N.zeros(hidden_size)
+            obj.b2 = N.zeros(input_size)
+        if seed is not None:
+            obj.seed(seed)
+        obj.__hide__ = ['params']
+
+    def build_regularization(self):
+        """
+        @todo: Why do we need this function?
+        """
+        return T.zero() # no regularization!
+
+
+class SigmoidXEDenoisingAA(DenoisingAA):
+    """
+    @todo: Merge this into the above.
+    @todo: Default noise level for all daa levels
+    """
+
+    def build_corrupted_input(self):
+        self.noise_level = theano.Member(T.scalar())
+        return self.random.binomial(T.shape(self.input), 1, 1 - self.noise_level) * self.input
+
+    def hid_activation_function(self, activation):
+        return self.activation_function(activation)
+
+    def out_activation_function(self, activation):
+        return self.activation_function(activation)
+
+    def build_reconstruction_costs(self, output):
+        return self.reconstruction_cost_function(self.input, output)
+
+    def build_regularization(self):
+        self.l2_coef = theano.Member(T.scalar())
+        if self.tie_weights:
+            return self.l2_coef * T.sum(self.w1 * self.w1)
+        else:
+            return self.l2_coef * (T.sum(self.w1 * self.w1) + T.sum(self.w2 * self.w2))
+
+    def _instance_initialize(self, obj, input_size = None, hidden_size = None, seed = None, **init):
+        init.setdefault('noise_level', 0)
+        init.setdefault('l2_coef', 0)
+        super(SigmoidXEDenoisingAA, self)._instance_initialize(obj, input_size, hidden_size, seed, **init)
+
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/algorithms/kernel_regression.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/algorithms/kernel_regression.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,231 @@
+"""
+Implementation of kernel regression:
+"""
+
+from pylearn.learner import OfflineLearningAlgorithm
+from theano import tensor as T
+from theano.tensor.nnet import prepend_1_to_each_row
+from theano.scalar import as_scalar
+from common.autoname import AutoName
+import theano
+import numpy
+
+# map a N-vector to a 1xN matrix
+row_vector = theano.tensor.DimShuffle((False,),['x',0])
+# map a N-vector to a Nx1 matrix
+col_vector = theano.tensor.DimShuffle((False,),[0,'x'])
+
+class KernelRegression(OfflineLearningAlgorithm):
+    """
+Implementation of kernel regression:
+* the data are n (x_t,y_t) pairs and we want to estimate E[y|x]
+* the predictor computes
+     f(x) = b + \sum_{t=1}^n \alpha_t K(x,x_t)
+  with free parameters b and alpha, training inputs x_t,
+  and kernel function K (gaussian by default).
+  Clearly, each prediction involves O(n) computations.
+* the learner chooses b and alpha to minimize
+     lambda alpha' G' G alpha + \sum_{t=1}^n (f(x_t)-y_t)^2
+  where G is the matrix with entries G_ij = K(x_i,x_j).
+  The first (L2 regularization) term is the squared L2
+  norm of the primal weights w = \sum_t \alpha_t phi(x_t)
+  where phi is the function s.t. K(u,v)=phi(u).phi(v).
+* this involves solving a linear system with (n+1,n+1)
+  matrix, which is an O(n^3) computation. In addition,
+  that linear system matrix requires O(n^2) memory.
+  So this learning algorithm should be used only for
+  small datasets.
+* the linear system is
+      (M + lambda I_n) theta = (1, y)'
+  where theta = (b, alpha), I_n is the (n+1)x(n+1) matrix that is the identity 
+  except with a 0 at (0,0), M is the matrix with G in the sub-matrix starting 
+  at (1,1), 1's in column 0, except for a value of n at (0,0), and sum_i G_{i,j} 
+  in the rest of row 0.
+  
+Note that this is gives an estimate of E[y|x,training_set] that is the
+same as obtained with a Gaussian process regression. The GP
+regression would also provide a Bayesian Var[y|x,training_set].
+It corresponds to an assumption that f is a random variable
+with Gaussian (process) prior distribution with covariance
+function K. Because we assume Gaussian noise we obtain a Gaussian
+posterior for f (whose mean is computed here).
+
+
+    Usage:
+
+       kernel_regressor=KernelRegression(L2_regularizer=0.1,gamma=0.5) (kernel=GaussianKernel(gamma=0.5))
+       kernel_predictor=kernel_regressor(training_set)
+       all_results_dataset=kernel_predictor(test_set) # creates a dataset with "output" and "squared_error" field
+       outputs = kernel_predictor.compute_outputs(inputs) # inputs and outputs are numpy arrays
+       outputs, errors = kernel_predictor.compute_outputs_and_errors(inputs,targets)
+       errors = kernel_predictor.compute_errors(inputs,targets)
+       mse = kernel_predictor.compute_mse(inputs,targets)
+       
+       
+
+    The training_set must have fields "input" and "target".
+    The test_set must have field "input", and needs "target" if
+    we want to compute the squared errors.
+
+    The predictor parameters are obtained analytically from the training set.
+    Training is only done on a whole training set rather than on minibatches
+    (no online implementation).
+
+    The dataset fields expected and produced by the learning algorithm and the trained model
+    are the following:
+
+     - Input and output dataset fields (example-wise quantities):
+
+       - 'input' (always expected as an input_dataset field)
+       - 'target' (always expected by the learning algorithm, optional for learned model)
+       - 'output' (always produced by learned model)
+       - 'squared_error' (optionally produced by learned model if 'target' is provided)
+          = example-wise squared error
+    """
+    def __init__(self, kernel=None, L2_regularizer=0, gamma=1, use_bias=False):
+        # THE VERSION WITH BIAS DOES NOT SEEM RIGHT
+        self.kernel = kernel
+        self.L2_regularizer=L2_regularizer
+        self.use_bias=use_bias
+        self.gamma = gamma # until we fix things, the kernel type is fixed, Gaussian
+        self.equations = KernelRegressionEquations()
+
+    def __call__(self,trainset):
+        n_examples = len(trainset)
+        first_example = trainset[0]
+        n_inputs = first_example['input'].size
+        n_outputs = first_example['target'].size
+        b1=1 if self.use_bias else 0
+        M = numpy.zeros((n_examples+b1,n_examples+b1))
+        Y = numpy.zeros((n_examples+b1,n_outputs))
+        for i in xrange(n_examples):
+            M[i+b1,i+b1]=self.L2_regularizer
+        data = trainset.fields()
+        train_inputs = numpy.array(data['input'])
+        if self.use_bias:
+            Y[0]=1
+        Y[b1:,:] = numpy.array(data['target'])
+        train_inputs_square,sumG,G=self.equations.compute_system_matrix(train_inputs,self.gamma)
+        M[b1:,b1:] += G
+        if self.use_bias:
+            M[0,1:] = sumG
+            M[1:,0] = 1
+            M[0,0] = M.shape[0]
+        self.M=M
+        self.Y=Y
+        theta=numpy.linalg.solve(M,Y)
+        return KernelPredictor(theta,self.gamma, train_inputs, train_inputs_square)
+
+class KernelPredictorEquations(AutoName):
+    train_inputs = T.matrix() # n_examples x n_inputs
+    train_inputs_square = T.vector() # n_examples
+    inputs = T.matrix() # minibatchsize x n_inputs
+    targets = T.matrix() # minibatchsize x n_outputs
+    theta = T.matrix() # (n_examples+1) x n_outputs
+    b1 = T.shape(train_inputs_square)[0]<T.shape(theta)[0]
+    gamma = T.scalar()
+    inv_gamma2 = 1./(gamma*gamma)
+    b = b1*theta[0]
+    alpha = theta[b1:,:]
+    inputs_square = T.sum(inputs*inputs,axis=1)
+    Kx = T.exp(-(row_vector(train_inputs_square)-2*T.dot(inputs,train_inputs.T)+col_vector(inputs_square))*inv_gamma2)
+    outputs = T.dot(Kx,alpha) + b # minibatchsize x n_outputs
+    squared_errors = T.sum(T.sqr(targets-outputs),axis=1)
+
+    __compiled = False
+    @classmethod
+    def compile(cls,linker='c|py'):
+        if cls.__compiled:
+            return
+        def fn(input_vars,output_vars):
+            return staticmethod(theano.function(input_vars,output_vars, linker=linker))
+
+        cls.compute_outputs = fn([cls.inputs,cls.theta,cls.gamma,cls.train_inputs,cls.train_inputs_square],[cls.outputs])
+        cls.compute_errors = fn([cls.outputs,cls.targets],[cls.squared_errors])
+
+        cls.__compiled = True
+
+    def __init__(self):
+        self.compile()
+        
+class KernelRegressionEquations(KernelPredictorEquations):
+    #M = T.matrix() # (n_examples+1) x (n_examples+1)
+    inputs = T.matrix() # n_examples x n_inputs
+    gamma = T.scalar()
+    inv_gamma2 = 1./(gamma*gamma)
+    inputs_square = T.sum(inputs*inputs,axis=1)
+    #new_G = G+T.dot(inputs,inputs.T)
+    #new_G = T.gemm(G,1.,inputs,inputs.T,1.)
+    G = T.exp(-(row_vector(inputs_square)-2*T.dot(inputs,inputs.T)+col_vector(inputs_square))*inv_gamma2)
+    sumG = T.sum(G,axis=0)
+    
+    __compiled = False
+    
+    @classmethod
+    def compile(cls,linker='c|py'):
+        if cls.__compiled:
+            return
+        def fn(input_vars,output_vars):
+            return staticmethod(theano.function(input_vars,output_vars, linker=linker))
+
+        cls.compute_system_matrix = fn([cls.inputs,cls.gamma],[cls.inputs_square,cls.sumG,cls.G])
+
+        cls.__compiled = True
+
+    def __init__(self):
+        self.compile()
+
+class KernelPredictor(object):
+    """
+    A kernel predictor has parameters theta (a bias vector and a weight matrix alpha)
+    it can use to make a non-linear prediction (according to the KernelPredictorEquations).
+    It can compute its output (bias + alpha * kernel(train_inputs,input) and a squared error (||output - target||^2).
+    """
+    def __init__(self, theta, gamma, train_inputs, train_inputs_square=None):
+        self.theta=theta
+        self.gamma=gamma
+        self.train_inputs=train_inputs
+        if train_inputs_square==None:
+            train_inputs_square = numpy.sum(train_inputs*train_inputs,axis=1)
+        self.train_inputs_square=train_inputs_square
+        self.equations = KernelPredictorEquations()
+
+    def compute_outputs(self,inputs):
+        return self.equations.compute_outputs(inputs,self.theta,self.gamma,self.train_inputs,self.train_inputs_square)
+    def compute_errors(self,inputs,targets):
+        return self.equations.compute_errors(self.compute_outputs(inputs),targets)
+    def compute_outputs_and_errors(self,inputs,targets):
+        outputs = self.compute_outputs(inputs)
+        return [outputs,self.equations.compute_errors(outputs,targets)]
+    def compute_mse(self,inputs,targets):
+        errors = self.compute_errors(inputs,targets)
+        return numpy.sum(errors)/errors.size
+    
+    def __call__(self,dataset,output_fieldnames=None,cached_output_dataset=False):
+        assert dataset.hasFields(["input"])
+        if output_fieldnames is None:
+            if dataset.hasFields(["target"]):
+                output_fieldnames = ["output","squared_error"]
+            else:
+                output_fieldnames = ["output"]
+        output_fieldnames.sort()
+        if output_fieldnames == ["squared_error"]:
+            f = self.compute_errors
+        elif output_fieldnames == ["output"]:
+            f = self.compute_outputs
+        elif output_fieldnames == ["output","squared_error"]:
+            f = self.compute_outputs_and_errors
+        else:
+            raise ValueError("unknown field(s) in output_fieldnames: "+str(output_fieldnames))
+        
+        ds=ApplyFunctionDataSet(dataset,f,output_fieldnames)
+        if cached_output_dataset:
+            return CachedDataSet(ds)
+        else:
+            return ds
+        
+
+def kernel_predictor(inputs,params,*otherargs):
+  p = KernelPredictor(params,*otherargs[0])
+  return p.compute_outputs(inputs)
+  
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/algorithms/layer.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/algorithms/layer.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,11 @@
+"""
+@todo: Make a layer class, with standardized names:
+    input, cost, lr, and update
+(a Method called update, to be more precise, whose first argument is the input)
+
+input_dimension, output_dimension (aliased as nin and nout)
+
+Modules like pylearn.algorithms.logistic_regression.Module_Nclass and
+pylearn.algorithms.???.Bin_Regressor should inherit from Layer and
+Stacker should assume Layer.
+"""
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/algorithms/linear_regression.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/algorithms/linear_regression.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,201 @@
+"""
+Implementation of linear regression, with or without L2 regularization.
+This is one of the simplest example of L{learner}, and illustrates
+the use of theano.
+"""
+
+from pylearn.learner import OfflineLearningAlgorithm,OnlineLearningAlgorithm
+from theano import tensor as T
+from theano.tensor.nnet import prepend_1_to_each_row
+from theano.scalar import as_scalar
+from common.autoname import AutoName
+import theano
+import numpy
+
+class LinearRegression(OfflineLearningAlgorithm):
+    """
+    Implement linear regression, with or without L2 regularization
+    (the former is called Ridge Regression and the latter Ordinary Least Squares).
+
+    Usage:
+
+       linear_regressor=LinearRegression(L2_regularizer=0.1)
+       linear_predictor=linear_regression(training_set)
+       all_results_dataset=linear_predictor(test_set) # creates a dataset with "output" and "squared_error" field
+       outputs = linear_predictor.compute_outputs(inputs) # inputs and outputs are numpy arrays
+       outputs, errors = linear_predictor.compute_outputs_and_errors(inputs,targets)
+       errors = linear_predictor.compute_errors(inputs,targets)
+       mse = linear_predictor.compute_mse(inputs,targets)
+       
+       
+
+    The training_set must have fields "input" and "target".
+    The test_set must have field "input", and needs "target" if
+    we want to compute the squared errors.
+
+    The predictor parameters are obtained analytically from the training set.
+
+    For each (input[t],output[t]) pair in a minibatch,::
+    
+       output_t = b + W * input_t
+
+    where b and W are obtained by minimizing::
+
+       L2_regularizer sum_{ij} W_{ij}^2  + sum_t ||output_t - target_t||^2
+
+    Let X be the whole training set inputs matrix (one input example per row),
+    with the first column full of 1's, and Let Y the whole training set
+    targets matrix (one example's target vector per row).
+    Let theta = the matrix with b in its first column and W in the others,
+    then each theta[:,i] is the solution of the linear system::
+
+       XtX * theta[:,i] = XtY[:,i]
+
+    where XtX is a (n_inputs+1)x(n_inputs+1) matrix containing X'*X
+    plus L2_regularizer on the diagonal except at (0,0),
+    and XtY is a (n_inputs+1)*n_outputs matrix containing X'*Y.
+
+    The dataset fields expected and produced by the learning algorithm and the trained model
+    are the following:
+
+     - Input and output dataset fields (example-wise quantities):
+
+       - 'input' (always expected as an input_dataset field)
+       - 'target' (always expected by the learning algorithm, optional for learned model)
+       - 'output' (always produced by learned model)
+       - 'squared_error' (optionally produced by learned model if 'target' is provided)
+          = example-wise squared error
+    """
+    def __init__(self, L2_regularizer=0,minibatch_size=10000):
+        self.L2_regularizer=L2_regularizer
+        self.equations = LinearRegressionEquations()
+        self.minibatch_size=minibatch_size
+
+    def __call__(self,trainset):
+        first_example = trainset[0]
+        n_inputs = first_example['input'].size
+        n_outputs = first_example['target'].size
+        XtX = numpy.zeros((n_inputs+1,n_inputs+1))
+        XtY = numpy.zeros((n_inputs+1,n_outputs))
+        for i in xrange(n_inputs):
+            XtX[i+1,i+1]=self.L2_regularizer
+        mbs=min(self.minibatch_size,len(trainset))
+        for inputs,targets in trainset.minibatches(["input","target"],minibatch_size=mbs):
+            XtX,XtY=self.equations.update(XtX,XtY,numpy.array(inputs),numpy.array(targets))
+        theta=numpy.linalg.solve(XtX,XtY)
+        return LinearPredictor(theta)
+
+class LinearPredictorEquations(AutoName):
+    inputs = T.matrix() # minibatchsize x n_inputs
+    targets = T.matrix() # minibatchsize x n_outputs
+    theta = T.matrix() # (n_inputs+1) x n_outputs
+    b = theta[0]
+    Wt = theta[1:,:]
+    outputs = T.dot(inputs,Wt) + b # minibatchsize x n_outputs
+    squared_errors = T.sum(T.sqr(targets-outputs),axis=1)
+
+    __compiled = False
+    @classmethod
+    def compile(cls, mode = "FAST_RUN"):
+        if cls.__compiled:
+            return
+        def fn(input_vars,output_vars):
+            return staticmethod(theano.function(input_vars, output_vars, mode=mode))
+
+        cls.compute_outputs = fn([cls.inputs,cls.theta],cls.outputs)
+        cls.compute_errors = fn([cls.outputs,cls.targets],cls.squared_errors)
+
+        cls.__compiled = True
+
+    def __init__(self):
+        self.compile()
+        
+class LinearRegressionEquations(LinearPredictorEquations):
+    P = LinearPredictorEquations
+    XtX = T.matrix() # (n_inputs+1) x (n_inputs+1)
+    XtY = T.matrix() # (n_inputs+1) x n_outputs
+    extended_input = prepend_1_to_each_row(P.inputs)
+    new_XtX = T.add(XtX,T.dot(extended_input.T,extended_input))
+    new_XtY = T.add(XtY,T.dot(extended_input.T,P.targets))
+
+    __compiled = False
+    
+    @classmethod
+    def compile(cls, mode="FAST_RUN"):
+        if cls.__compiled:
+            return
+        def fn(input_vars,output_vars):
+            return staticmethod(theano.function(input_vars, output_vars, mode=mode))
+
+        cls.update = fn([cls.XtX,cls.XtY,cls.P.inputs,cls.P.targets],[cls.new_XtX,cls.new_XtY])
+
+        cls.__compiled = True
+
+    def __init__(self):
+        self.compile()
+
+class LinearPredictor(object):
+    """
+    A linear predictor has parameters theta (a bias vector and a weight matrix)
+    it can use to make a linear prediction (according to the LinearPredictorEquations).
+    It can compute its output (bias + weight * input) and a squared error (||output - target||^2).
+    """
+    def __init__(self, theta):
+        self.theta=theta
+        self.n_inputs=theta.shape[0]-1
+        self.n_outputs=theta.shape[1]
+        self.equations = LinearPredictorEquations()
+
+    def compute_outputs(self,inputs):
+        return self.equations.compute_outputs(inputs,self.theta)
+    def compute_errors(self,inputs,targets):
+        return self.equations.compute_errors(self.compute_outputs(inputs),targets)
+    def compute_outputs_and_errors(self,inputs,targets):
+        outputs = self.compute_outputs(inputs)
+        return [outputs,self.equations.compute_errors(outputs,targets)]
+    def compute_mse(self,inputs,targets):
+        errors = self.compute_errors(inputs,targets)
+        return numpy.sum(errors)/errors.size
+    
+    def __call__(self,dataset,output_fieldnames=None,cached_output_dataset=False):
+        assert dataset.hasFields(["input"])
+        if output_fieldnames is None:
+            if dataset.hasFields(["target"]):
+                output_fieldnames = ["output","squared_error"]
+            else:
+                output_fieldnames = ["output"]
+        output_fieldnames.sort()
+        if output_fieldnames == ["squared_error"]:
+            f = self.compute_errors
+        elif output_fieldnames == ["output"]:
+            f = self.compute_outputs
+        elif output_fieldnames == ["output","squared_error"]:
+            f = self.compute_outputs_and_errors
+        else:
+            raise ValueError("unknown field(s) in output_fieldnames: "+str(output_fieldnames))
+        
+        ds=ApplyFunctionDataSet(dataset,f,output_fieldnames)
+        if cached_output_dataset:
+            return CachedDataSet(ds)
+        else:
+            return ds
+        
+
+def linear_predictor(inputs,params,*otherargs):
+  p = LinearPredictor(params)
+  return p.compute_outputs(inputs)
+
+#TODO : an online version
+class OnlineLinearRegression(OnlineLearningAlgorithm):
+    """
+    Training can proceed sequentially (with multiple calls to update with
+    different disjoint subsets of the training sets). After each call to
+    update the predictor is ready to be used (and optimized for the union
+    of all the training sets passed to update since construction or since
+    the last call to forget).
+    """
+    pass
+
+
+
+    
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/algorithms/logistic_regression.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/algorithms/logistic_regression.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,286 @@
+import sys, copy
+import theano
+from theano import tensor as T
+from theano.tensor import nnet
+from theano.compile import module
+from theano import printing, pprint
+from theano import compile
+
+import numpy as N
+
+class LogRegN(module.FancyModule):
+    """
+    A symbolic module for performing N-class logistic regression.
+
+    Notable variables
+    -----------------
+
+    self.input
+    self.target 
+    self.softmax
+    self.argmax
+    self.regularized_cost
+    self.unregularized_cost
+    """
+
+    def __init__(self, 
+            n_in=None, n_out=None,
+            input=None, target=None, 
+            w=None, b=None, 
+            l2=None, l1=None):
+        super(LogRegN, self).__init__() #boilerplate
+
+        self.n_in = n_in
+        self.n_out = n_out
+
+        self.input = input if input is not None else T.matrix()
+        self.target = target if target is not None else T.lvector()
+
+        self.w = w if w is not None else (T.dmatrix())
+        self.b = b if b is not None else (T.dvector())
+
+        #the params of the model are the ones we fit to the data
+        self.params = [p for p in [self.w, self.b] if p.owner is None]
+        
+        #the hyper-parameters of the model are not fit to the data
+        self.l2 = l2 if l2 is not None else (T.dscalar())
+        self.l1 = l1 if l1 is not None else (T.dscalar())
+
+        #here we actually build the model
+        self.linear_output = T.dot(self.input, self.w) + self.b
+        if 0:
+            # TODO: pending support for target being a sparse matrix
+            self.softmax = nnet.softmax(self.linear_output)
+
+            self._max_pr, self.argmax = T.max_and_argmax(self.linear_output)
+            self._xent = self.target * T.log(self.softmax)
+        else:
+            # TODO: when above is fixed, remove this hack (need an argmax
+            # which is independent of targets)
+            self.argmax_standalone = T.argmax(self.linear_output)
+            (self._xent, self.softmax, self._max_pr, self.argmax) =\
+                    nnet.crossentropy_softmax_max_and_argmax_1hot(
+                    self.linear_output, self.target)
+
+        self.unregularized_cost = T.sum(self._xent)
+        self.l1_cost = self.l1 * T.sum(abs(self.w))
+        self.l2_cost = self.l2 * T.sum(self.w**2)
+        self.regularized_cost = self.unregularized_cost + self.l1_cost + self.l2_cost
+        self._loss_zero_one = T.mean(T.neq(self.argmax, self.target))
+
+        # Softmax being computed directly.
+	# TODO: Move somewhere else, more clean.
+        self.softmax_unsupervised = nnet.softmax(self.linear_output)
+
+        # METHODS
+        if 0: #TODO: PENDING THE BETTER IMPLEMENTATION ABOVE
+            self.predict = module.Method([self.input], self.argmax)
+            self.label_probs = module.Method([self.input], self.softmax)
+        self.validate = module.Method([self.input, self.target], 
+                [self._loss_zero_one, self.regularized_cost, self.unregularized_cost])
+
+    def _instance_initialize(self, obj):
+        obj.w = N.zeros((self.n_in, self.n_out))
+        obj.b = N.zeros(self.n_out)
+        obj.__pp_hide__ = ['params']
+
+def logistic_regression(n_in, n_out, l1, l2, minimizer=None):
+    if n_out == 2:
+        raise NotImplementedError()
+    else:
+        rval = LogRegN(n_in=n_in, n_out=n_out, l1=l1, l2=l2)
+        print 'RVAL input target', rval.input, rval.target
+        rval.minimizer = minimizer([rval.input, rval.target], rval.regularized_cost,
+                rval.params)
+        return rval.make(mode='FAST_RUN')
+
+#TODO: grouping parameters by prefix does not play well with providing defaults. Think...
+#FIX : Guillaume suggested a convention: plugin handlers (dataset_factory, minimizer_factory,
+#      etc.) should never provide default arguments for parameters, and accept **kwargs to catch
+#      irrelevant parameters.
+#SOLUTION: the jobman deals in nested dictionaries.  This means that there is no [dumb] reason that
+#          irrelevant arguments should be passed at all.
+class _fit_logreg_defaults(object):
+    minimizer_algo = 'dummy'
+    #minimizer_lr = 0.001
+    dataset = 'MNIST_1k'
+    l1 = 0.0
+    l2 = 0.0
+    batchsize = 8
+    verbose = 1
+
+def fit_logistic_regression_online(state, channel=lambda *args, **kwargs:None):
+    #use stochastic gradient descent
+    state.use_defaults(_fit_logreg_defaults)
+
+    dataset = make(state.dataset)
+    train = dataset.train
+    valid = dataset.valid
+    test = dataset.test
+
+    logreg = logistic_regression(
+            n_in=train.x.shape[1],
+            n_out=dataset.n_classes,
+            l2=state.l2,
+            l1=state.l1,
+            minimizer=make_minimizer(**state.subdict(prefix='minimizer_')))
+
+    batchsize = state.batchsize
+    verbose = state.verbose
+    iter = [0]
+
+    def step():
+        # step by making a pass through the training set
+        for j in xrange(0,len(train.x)-batchsize+1,batchsize):
+            cost_j = logreg.minimizer.step_cost(train.x[j:j+batchsize], train.y[j:j+batchsize])
+            if verbose > 1:
+                print 'estimated train cost', cost_j
+        #TODO: consult iter[0] for periodic saving to cwd (model, minimizer, and stopper)
+
+    def check():
+        validate = logreg.validate(valid.x, valid.y)
+        if verbose > 0: 
+            print 'iter', iter[0], 'validate', validate
+            sys.stdout.flush()
+        iter[0] += 1
+        return validate[0]
+
+    def save():
+        return copy.deepcopy(logreg)
+
+    stopper = make_stopper(**state.subdict(prefix='stopper_'))
+    stopper.find_min(step, check, save)
+
+    state.train_01, state.train_rcost, state.train_cost = logreg.validate(train.x, train.y)
+    state.valid_01, state.valid_rcost, state.valid_cost = logreg.validate(valid.x, valid.y)
+    state.test_01, state.test_rcost, state.test_cost = logreg.validate(test.x, test.y)
+
+    state.n_train = len(train.y)
+    state.n_valid = len(valid.y)
+    state.n_test = len(test.y)
+
+class LogReg2(module.FancyModule):
+    def __init__(self, input=None, targ=None, w=None, b=None, lr=None, regularize=False):
+        super(LogReg2, self).__init__() #boilerplate
+
+        self.input = (input) if input is not None else T.matrix('input')
+        self.targ = (targ) if targ is not None else T.lcol()
+
+        self.w = (w) if w is not None else (T.dmatrix())
+        self.b = (b) if b is not None else (T.dvector())
+        self.lr = (lr) if lr is not None else (T.dscalar())
+
+        self.params = [p for p in [self.w, self.b] if p.owner is None]
+
+        output = nnet.sigmoid(T.dot(self.x, self.w) + self.b)
+        xent = -self.targ * T.log(output) - (1.0 - self.targ) * T.log(1.0 - output)
+        sum_xent = T.sum(xent)
+
+        self.output = output
+        self.xent = xent
+        self.sum_xent = sum_xent
+        self.cost = sum_xent
+
+        #define the apply method
+        self.pred = (T.dot(self.input, self.w) + self.b) > 0.0
+        self.apply = module.Method([self.input], self.pred)
+
+        #if this module has any internal parameters, define an update function for them
+        if self.params:
+            gparams = T.grad(sum_xent, self.params)
+            self.update = module.Method([self.input, self.targ], sum_xent,
+                                        updates = dict((p, p - self.lr * g) for p, g in zip(self.params, gparams)))
+
+
+class classification:  #this would go to a file called pylearn/algorithms/classification.py
+
+    @staticmethod
+    def xent(p, q):
+        """cross-entropy (row-wise)
+
+        :type p: M x N symbolic matrix (sparse or dense)
+
+        :param p: each row is a true distribution over N things
+
+        :type q: M x N symbolic matrix (sparse or dense)
+
+        :param q: each row is an approximating distribution over N things
+
+        :rtype: symbolic vector of length M
+
+        :returns: the cross entropy between each row of p and the corresponding row of q.
+        
+
+        Hint: To sum row-wise costs into a scalar value, use "xent(p, q).sum()"
+        """
+        return (p * tensor.log(q)).sum(axis=1)
+
+    @staticmethod
+    def errors(target, prediction):
+        """classification error (row-wise)
+
+        :type p: M x N symbolic matrix (sparse or dense)
+
+        :param p: each row is a true distribution over N things
+
+        :type q: M x N symbolic matrix (sparse or dense)
+
+        :param q: each row is an approximating distribution over N things
+
+        :rtype: symbolic vector of length M
+
+        :returns: a vector with 0 for every row pair that has a maximum in the same position, 
+        and 1 for every other row pair.
+        
+
+        Hint: Count errors with "errors(prediction, target).sum()", and get the error-rate with
+        "errors(prediction, target).mean()"
+        """
+        return tensor.neq(
+                tensor.argmax(prediction, axis=1),
+                tensor.argmax(target, axis=1))
+
+class LogReg_New(module.FancyModule):
+    """A symbolic module for performing multi-class logistic regression."""
+
+    params = property(
+            lambda self: [p for p in [self.w, self.b] if p.owner is None],
+            doc="WRITEME"
+            )
+
+    def __init__(self, n_in=None, n_out=None, w=None, b=None):
+        super(LogRegNew, self).__init__() #boilerplate
+
+        self.n_in = n_in
+        self.n_out = n_out
+
+        self.w = w if w is not None else (T.dmatrix())
+        self.b = b if b is not None else (T.dvector())
+
+    def _instance_initialize(self, obj):
+        obj.w = N.zeros((self.n_in, self.n_out))
+        obj.b = N.zeros(self.n_out)
+        obj.__pp_hide__ = ['params']
+
+
+    def l1(self):
+        return abs(self.w).sum()
+
+    def l2(self):
+        return (self.w**2).sum()
+
+    def activation(self, input):
+        return theano.dot(input, self.w) + self.b
+
+    def softmax(self, input):
+        return nnet.softmax(self.activation(input))
+
+    def argmax(self, input):
+        return tensor.argmax(self.activation(input))
+
+    def xent(self, input, target):
+        return classification.xent(target, self.softmax(input))
+
+    def errors(self, input, target):
+        return classification.errors(target, self.softmax(input))
+
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/algorithms/minimizer.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/algorithms/minimizer.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,36 @@
+"""Define the interface and factory for gradient-based minimizers.
+"""
+import theano
+
+class DummyMinimizer(theano.Module):
+    """ The idea of a minimizer is that it provides an `step` function that will
+    eventually converge toward (maybe realize?) the minimum of a cost function.
+
+    The step_cost function takes a step and returns the cost associated with either
+    the current or previous parameter values (return whichever is easiest to compute, it's
+    meant for user feedback.)
+
+    """
+    def __init__(self, args, cost, parameters, gradients=None):
+        super(DummyMinimizer, self).__init__()
+
+    def _instance_step(self, obj, *args):
+        """Move the parameters toward the minimum of a cost
+
+        :param args: The arguments here should be values for the Variables that were in the
+        `args` argument to the constructor.
+
+        :Return:  None
+        """
+        pass
+
+    def _instance_step_cost(self, obj, *args):
+        """Move the parameters toward the minimum of a cost, and compute the cost
+
+        :param args: The arguments here should be values for the Variables that were in the
+        `args` argument to the constructor.
+
+        :Return:  The current cost value.
+        """
+        pass
+
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/algorithms/rbm.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/algorithms/rbm.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,100 @@
+import sys, copy
+import theano
+from theano import tensor as T
+from theano.tensor.deprecated import rmodule
+from theano.tensor.nnet import sigmoid
+from theano.compile import module
+from theano import printing, pprint
+from theano import compile
+
+import numpy as N
+
+from ..datasets import make_dataset
+from .minimizer import make_minimizer
+from .stopper import make_stopper
+
+class RBM(rmodule.RModule):
+
+    # is it really necessary to pass ALL of these ? - GD
+    def __init__(self,
+            nvis=None, nhid=None,
+            input=None,
+            w=None, hidb=None, visb=None,
+            seed=0, lr=0.1):
+      
+        super(RBM, self).__init__()
+        self.nhid, self.nvis = nhid, nvis
+        self.lr = lr
+       
+        # symbolic theano stuff
+        # what about multidimensional inputs/outputs ? do they have to be 
+        # flattened or should we used tensors instead ?
+        self.w = w if w is not None else module.Member(T.dmatrix())
+        self.visb = visb if visb is not None else module.Member(T.dvector())
+        self.hidb = hidb if hidb is not None else module.Member(T.dvector())
+        self.seed = seed;
+       
+        # 1-step Markov chain
+        vis = T.dmatrix()
+        hid = sigmoid(T.dot(vis, self.w) + self.hidb)
+        hid_sample = self.random.binomial(T.shape(hid), 1, hid)
+        neg_vis = sigmoid(T.dot(hid_sample, self.w.T) + self.visb)
+        neg_vis_sample = self.random.binomial(T.shape(neg_vis), 1, neg_vis)
+        neg_hid = sigmoid(T.dot(neg_vis_sample, self.w) + self.hidb)
+
+        # function which execute 1-step Markov chain (with and without cd updates)
+        self.updownup = module.Method([vis], [hid, neg_vis_sample, neg_hid])
+
+        # function to perform manual cd update given 2 visible and 2 hidden values
+        vistemp = T.dmatrix()
+        hidtemp = T.dmatrix()
+        nvistemp = T.dmatrix()
+        nhidtemp = T.dmatrix()
+        self.cd_update = module.Method([vistemp, hidtemp, nvistemp, nhidtemp],
+                [],
+                updates = {self.w: self.w + self.lr * 
+                                   (T.dot(vistemp.T, hidtemp) - 
+                                    T.dot(nvistemp.T, nhidtemp)),
+                           self.visb: self.visb + self.lr * 
+                                      (T.sum(vistemp - nvistemp,axis=0)),
+                           self.hidb: self.hidb + self.lr *
+                                      (T.sum(hidtemp - nhidtemp,axis=0))});
+
+    # TODO: add parameter for weigth initialization
+    def _instance_initialize(self, obj):
+        obj.w = N.random.standard_normal((self.nvis,self.nhid))
+        obj.visb = N.zeros(self.nvis)
+        obj.hidb = N.zeros(self.nhid)
+        obj.seed(self.seed);
+
+    def _instance_cd1(self, obj, input, k=1):
+        poshid, negvissample, neghid = obj.updownup(input)
+        for i in xrange(k-1):
+            ahid, negvissample, neghid = obj.updownup(negvissample)
+        # CD-k update
+        obj.cd_update(input, poshid, negvissample, neghid)
+
+
+def train_rbm(state, channel=lambda *args, **kwargs:None):
+    dataset = make_dataset(**state.dataset)
+    train = dataset.train
+
+    rbm_module = RBM(
+            nvis=train.x.shape[1],
+            nhid=state['nhid'])
+    rbm = rbm_module.make()
+
+    batchsize = state.get('batchsize', 1)
+    verbose = state.get('verbose', 1)
+    iter = [0]
+
+    while iter[0] != state['max_iters']:
+        for j in xrange(0,len(train.x)-batchsize+1,batchsize):
+            rbm.cd1(train.x[j:j+batchsize])
+            if verbose > 1:
+                print 'estimated train cost...'
+            if iter[0] == state['max_iters']:
+                break
+            else:
+                iter[0] += 1
+
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/algorithms/regressor.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/algorithms/regressor.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,104 @@
+
+import theano
+from theano import tensor as T
+from theano.tensor import nnet as NN
+import numpy as N
+
+class Regressor(theano.FancyModule):
+
+    def __init__(self, input = None, target = None, regularize = True):
+        super(Regressor, self).__init__()
+
+        # MODEL CONFIGURATION
+        self.regularize = regularize
+
+        # ACQUIRE/MAKE INPUT AND TARGET
+        self.input = theano.External(input) if input else T.matrix('input')
+        self.target = theano.External(target) if target else T.matrix('target')
+
+        # HYPER-PARAMETERS
+        self.lr = theano.Member(T.scalar())
+
+        # PARAMETERS
+        self.w = theano.Member(T.matrix())
+        self.b = theano.Member(T.vector())
+
+        # OUTPUT
+        self.output_activation = T.dot(self.input, self.w) + self.b
+        self.output = self.build_output()
+
+        # REGRESSION COST
+        self.regression_cost = self.build_regression_cost()
+
+        # REGULARIZATION COST
+        self.regularization = self.build_regularization()
+
+        # TOTAL COST
+        self.cost = self.regression_cost
+        if self.regularize:
+            self.cost = self.cost + self.regularization
+
+        # GRADIENTS AND UPDATES
+        self.params = self.w, self.b
+        gradients = T.grad(self.cost, self.params)
+        updates = dict((p, p - self.lr * g) for p, g in zip(self.params, gradients))
+
+        # INTERFACE METHODS
+        self.update = theano.Method([self.input, self.target], self.cost, updates)
+        self.get_cost = theano.Method([self.input, self.target], self.cost)
+        self.predict = theano.Method(self.input, self.output)
+
+        self.build_extensions()
+
+    def _instance_initialize(self, obj, input_size = None, output_size = None, seed = None, **init):
+        if seed is not None:
+            R = N.random.RandomState(seed)
+        else:
+            R = N.random
+        if (input_size is None) ^ (output_size is None):
+            raise ValueError("Must specify input_size and output_size or neither.")
+        super(Regressor, self)._instance_initialize(obj, **init)
+        if input_size is not None:
+            sz = (input_size, output_size)
+            range = 1/N.sqrt(input_size)
+            obj.w = R.uniform(size = sz, low = -range, high = range)
+            obj.b = N.zeros(output_size)
+        obj.__hide__ = ['params']
+
+    def _instance_flops_approx(self, obj):
+        return obj.w.size
+
+    def build_extensions(self):
+        pass
+
+    def build_output(self):
+        raise NotImplementedError('override in subclass')
+
+    def build_regression_cost(self):
+        raise NotImplementedError('override in subclass')
+
+    def build_regularization(self):
+        return T.zero() # no regularization!
+
+
+class BinRegressor(Regressor):
+
+    def build_extensions(self):
+        self.classes = T.iround(self.output)
+        self.classify = theano.Method(self.input, self.classes)
+
+    def build_output(self):
+        return NN.sigmoid(self.output_activation)
+
+    def build_regression_cost(self):
+        self.regression_cost_matrix = self.target * T.log(self.output) + (1.0 - self.target) * T.log(1.0 - self.output)
+        self.regression_costs = -T.sum(self.regression_cost_matrix, axis=1)
+        return T.mean(self.regression_costs)
+
+    def build_regularization(self):
+        self.l2_coef = theano.Member(T.scalar())
+        return self.l2_coef * T.sum(self.w * self.w)
+
+    def _instance_initialize(self, obj, input_size = None, output_size = 1, seed = None, **init):
+        init.setdefault('l2_coef', 0)
+        super(BinRegressor, self)._instance_initialize(obj, input_size, output_size, seed, **init)
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/algorithms/rnn.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/algorithms/rnn.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,247 @@
+#!/usr/bin/env python
+import numpy as N
+from theano import Op, Apply, tensor as T, Module, Member, Method, Mode, compile
+from theano.gof import OpSub, TopoOptimizer
+
+from minimizer import make_minimizer # minimizer
+from theano.printing import Print
+import sgd #until Olivier's module-import thing works better
+
+####################
+# Library-type stuff
+####################
+
+class TanhRnn(Op):
+    """
+    This class implements the recurrent part of a recurrent neural network.
+
+    There is not a neat way to include this in a more fine-grained way in Theano at the moment,
+    so to get something working, I'm implementing a relatively complicated Op that could be 
+    broken down later into constituents.
+
+    Anyway, this Op implements recursive computation of the form:
+
+    .. latex-eqn:
+        z_t &= \tanh( z_{t-1} A + x_{t-1})
+
+    For z0 a vector, and x a TxM matrix, it returns a matrix z of shape (T+1, M), 
+    in which z[0] = z0.
+
+    """
+
+    def make_node(self, x, z0, A):
+        """
+        :type x:  matrix (each row is an x_t) (shape: (T, M))
+        :type z0:  vector (the first row of output) (shape: M)
+        :type A: matrix (M by M)
+
+        """
+        x = T.as_tensor(x)
+        z0 = T.as_tensor(z0)
+        A = T.as_tensor(A)
+        z = x.type() #make a new symbolic result with the same type as x
+        return Apply(self, [x, z0, A], [z])
+
+    def perform(self, node, (x,z0,A), out):
+        T,M = x.shape
+        z = N.zeros((T+1, M))
+        z[0] = z0
+        for i in xrange(T):
+            z[i+1] = N.tanh(N.dot(z[i], A) + x[i])
+        out[0][0] = z
+
+    def grad(self, (x, z0, A), (gz,)):
+        z = tanh_rnn(x, z0, A)
+        gz_incl_rnn, gx = tanh_rnn_grad(A, z, gz)
+        return [gx, gz_incl_rnn[0], (T.dot(z[:-1].T, gx))]
+tanh_rnn = TanhRnn()
+
+class TanhRnnGrad(Op):
+    """Gradient calculation for TanhRnn"""
+
+    def __init__(self, inplace):
+        self.inplace = inplace
+
+        if self.inplace:
+            self.destroy_map = {0: [2]}
+
+    def __eq__(self, other):
+        return (type(self) == type(other)) and (self.inplace == other.inplace)
+
+    def __hash__(self, other):
+        return hash(type(self)) ^ hash(self.inplace)
+
+    def make_node(self, A, z, gz):
+        return Apply(self, [A,z,gz], (z.type(), gz.type()))
+
+    def perform(self, node, (A, z, gz), out):
+        Tp1,M = z.shape
+        T = Tp1 - 1
+        gx = N.zeros((T, M))
+
+        if not self.inplace:
+            gz = gz.copy()
+
+        for i in xrange(T-1, -1, -1):
+            #back through the tanh
+            gx[i] = gz[i+1] * (1.0 - z[i+1] * z[i+1])
+            gz[i] += N.dot(A, gx[i])
+
+        out[0][0] = gz
+        out[1][0] = gx
+
+    def __str__(self):
+        if self.inplace:
+            return 'Inplace' + super(TanhRnnGrad, self).__str__()
+        else:
+            return super(TanhRnnGrad, self).__str__()
+
+tanh_rnn_grad = TanhRnnGrad(inplace=False)
+tanh_rnn_grad_inplace = TanhRnnGrad(inplace=True)
+
+compile.optdb.register('inplace_rnngrad', TopoOptimizer(OpSub(tanh_rnn_grad, tanh_rnn_grad_inplace)), 60, 'fast_run', 'inplace')
+
+
+#######################
+# Experiment-type stuff
+#######################
+
+
+
+class ExampleRNN(Module):
+
+    def __init__(self, n_vis, n_hid, n_out, minimizer):
+        super(ExampleRNN, self).__init__()
+
+        def affine(weight, bias):
+            return (lambda a : T.dot(a, weight) + bias)
+
+        self.n_vis = n_vis
+        self.n_hid = n_hid
+        self.n_out = n_out
+
+        #affine transformatoin x -> latent space
+        self.v, self.b = Member(T.dmatrix()), Member(T.dvector())
+        input_transform = affine(self.v, self.b)
+
+        #recurrent weight matrix in latent space
+        self.z0 = Member(T.dvector())
+        self.w = Member(T.dmatrix())
+
+        #affine transformation latent -> output space
+        self.u, self.c = Member(T.dmatrix()), Member(T.dvector())
+        output_transform = affine(self.u, self.c)
+
+        self.params = [self.v, self.b, self.w, self.u, self.c]
+
+        #input and target
+        x, y = T.dmatrix(), T.dmatrix()
+
+        z = tanh_rnn(input_transform(x), self.z0, self.w)
+        yhat = output_transform(z[1:])
+        self.cost = T.sum((y - yhat)**2)
+
+        self.blah = Method([x,y], self.cost)
+
+        # using the make_minimizer protocol
+        self.minimizer = minimizer([x, y], self.cost, self.params)
+
+    def _instance_initialize(self, obj):
+        n_vis = self.n_vis
+        n_hid = self.n_hid
+        n_out = self.n_out
+
+        rng = N.random.RandomState(2342)
+
+        obj.z0 = N.zeros(n_hid)
+        obj.v = rng.randn(n_vis, n_hid) * 0.01
+        obj.b = N.zeros(n_hid)
+        obj.w = rng.randn(n_hid, n_hid) * 0.01
+        obj.u = rng.randn(n_hid, n_out) * 0.01
+        obj.c = N.zeros(n_out)
+        obj.minimizer.initialize()
+    def _instance__eq__(self, other):
+        if not isinstance(other.component, ExampleRNN):
+            raise NotImplemented
+         #we compare the member.
+#        if self.n_vis != other.n_vis or slef.n_hid != other.n_hid or self.n_out != other.n_out:
+#            return False
+        if (N.abs(self.z0-other.z0)<1e-8).all() and (N.abs(self.v-other.v)<1e-8).all() and (N.abs(self.b-other.b)<1e-8).all() and (N.abs(self.w-other.w)<1e-8).all() and (N.abs(self.u-other.u)<1e-8).all() and (N.abs(self.c-other.c)<1e-8).all() and (N.abs(self.z0-other.z0)<1e-8).all():
+            return True
+        return False
+
+    def _instance__hash__(self):
+        raise NotImplemented
+
+def test_example_rnn():
+    minimizer_fn = make_minimizer('sgd', stepsize = 0.001)
+
+    n_vis = 5
+    n_out = 3
+    n_hid = 4
+    rnn_module = ExampleRNN(n_vis, n_hid, n_out, minimizer_fn)
+
+    rnn = rnn_module.make(mode='FAST_RUN')
+
+    rng = N.random.RandomState(7722342)
+    x = rng.randn(10,n_vis)
+    y = rng.randn(10,n_out)
+
+    #set y to be like x with a lag of LAG
+    LAG = 4
+    y[LAG:] = x[:-LAG, 0:n_out]
+
+    if 1:
+        for i, node in enumerate(rnn.minimizer.step_cost.maker.env.toposort()):
+            print i, node
+
+    niter=1500
+    for i in xrange(niter):
+        if i % 100 == 0:
+            print i, rnn.minimizer.step_cost(x, y), rnn.minimizer.stepsize
+        else:
+            rnn.minimizer.step_cost(x, y)
+
+def test_WEIRD_STUFF():
+    n_vis = 5
+    n_out = 3
+    n_hid = 4
+    rng = N.random.RandomState(7722342)
+    x = rng.randn(10,n_vis)
+    y = rng.randn(10,n_out)
+
+    #set y to be like x with a lag of LAG
+    LAG = 4
+    y[LAG:] = x[:-LAG, 0:n_out]
+
+    minimizer_fn1 = make_minimizer('sgd', stepsize = 0.001)
+    minimizer_fn2 = make_minimizer('sgd', stepsize = 0.001)
+    rnn_module1 = ExampleRNN(n_vis, n_hid, n_out, minimizer_fn1)
+    rnn_module2 = ExampleRNN(n_vis, n_hid, n_out, minimizer_fn2)
+    rnn1 = rnn_module2.make(mode='FAST_RUN')
+    rnn2 = rnn_module1.make(mode='FAST_COMPILE')
+    if 0:
+        topo1=rnn1.minimizer.step_cost.maker.env.toposort()
+        topo2=rnn2.minimizer.step_cost.maker.env.toposort()
+        for i in range(len(topo1)):
+            print '1',i, topo1[i]
+            print '2',i, topo2[i]
+
+
+
+    niter=50
+    for i in xrange(niter):
+        rnn1.minimizer.step(x, y)
+        rnn2.minimizer.step(x, y)
+
+    #    assert rnn1.n_vis != rnn2.n_vis or slef.n_hid != rnn2.n_hid or rnn1.n_out != rnn2.n_out
+        assert (N.abs(rnn1.z0-rnn2.z0)<1e-8).all()
+        assert (N.abs(rnn1.v-rnn2.v)<1e-8).all() and (N.abs(rnn1.b-rnn2.b)<1e-8).all() and (N.abs(rnn1.w-rnn2.w)<1e-8).all() and (N.abs(rnn1.u-rnn2.u)<1e-8).all() and (N.abs(rnn1.c-rnn2.c)<1e-8).all()
+
+    #    assert b
+
+if __name__ == '__main__':
+#    from theano.tests import main
+#    main(__file__)
+    test_example_rnn()
+    test_WEIRD_STUFF()
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/algorithms/sandbox/__init__.py
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/algorithms/sandbox/_test_onehotop.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/algorithms/sandbox/_test_onehotop.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,21 @@
+from onehotop import one_hot
+
+import unittest
+from theano import compile
+from theano import gradient
+from theano import function
+from theano.tensor import as_tensor
+
+import random
+import numpy.random
+
+class T_OneHot(unittest.TestCase):
+    def test0(self):
+        x = as_tensor([3, 2, 1])
+        y = as_tensor(5)
+        o = one_hot(x, y)
+        f = function([],o)
+        self.failUnless(numpy.all(f() == numpy.asarray([[0, 0, 0, 1, 0], [0, 0, 1, 0, 0], [0, 1, 0, 0, 0]])))
+
+if __name__ == '__main__':
+    unittest.main()
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/algorithms/sandbox/cost.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/algorithms/sandbox/cost.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,154 @@
+"""
+Cost functions.
+
+@note: All of these functions return one cost per example. So it is your
+job to perform a tensor.sum over the individual example losses.
+"""
+
+import theano as T
+from theano import tensor, scalar
+import numpy
+
+class UndefinedGradient(Exception):
+    """
+    Raised by UndefinedGradientOp to indicate that the gradient is undefined mathematically.
+    """
+    pass
+from theano import gof
+class UndefinedGradientOp(gof.Op):
+    def perform(self, x=None):
+        if x is not None: raise UndefinedGradient(x)
+        else: raise UndefinedGradient(x)
+undefined_gradient = UndefinedGradientOp()
+
+class LogFactorial(scalar.UnaryScalarOp):
+    """
+    Compute log x!.
+    @todo: Rewrite so that it uses INTs not FLOATs.
+    @todo: Move this to Theano.
+    @todo: This function is slow, probably want to cache the values.
+    """
+    @staticmethod
+    def st_impl(x):
+        if not isinstance(x, int) and not isinstance(x, long):
+            raise TypeError('type(x) = %s, must be int or long' % type(x))
+        if x == 0.0:
+            return 0.0
+        v = 0.0
+        for i in range(x):
+            v += numpy.log(x)
+        return v
+    def impl(self, x):
+        return LogFactorial.st_impl(x)
+    def grad(self, (x,), (gz,)):
+        undefined_gradient(self)
+#    def grad(self, (x,), (gz,)):
+#        raise NotImplementedError('gradient not defined over discrete values')
+#        return None
+#        return [gz * (1 + scalar.log(x))]
+#    def c_code(self, node, name, (x,), (z,), sub):
+#        if node.inputs[0].type in [scalar.float32, scalar.float64]:
+#            return """%(z)s =
+#                %(x)s == 0.0
+#                ? 0.0
+#                : %(x)s * log(%(x)s);""" % locals()
+#        raise NotImplementedError('only floatingpoint is implemented')
+scalar_logfactorial = LogFactorial(scalar.upgrade_to_float, name='scalar_logfactoral')
+logfactorial = tensor.Elemwise(scalar_logfactorial, name='logfactorial')
+
+
+def poissonlambda(unscaled_output, doclen, beta_scale):
+    """
+    A continuous parameter lambda_i which is the expected number of
+    occurence of word i in the document.  Note how this must be positive,
+    and that is why Ranzato and Szummer (2008) use an exponential.
+
+    Yoshua: I don't like exponentials to guarantee positivity. softplus
+    is numerically much better behaved (but you might want to try both
+    to see if it makes a difference).
+
+    @todo: Maybe there are more sensible ways to set the beta_scale.
+    """
+    beta = beta_scale * doclen
+    return beta * tensor.exp(unscaled_output)
+
+def nlpoisson(target, output, beta_scale=1, axis=0, sumloss=True, zerothreshold=0):
+    """
+    The negative log Poisson regression probability.
+    From Ranzato and Szummer (2008).
+
+    Output should be of the form Weight*code+bias, i.e. unsquashed.
+    NB this is different than the formulation in Salakhutdinov and Hinton
+    (2007), in which the output is softmax'ed and multiplied by the input
+    document length. That is also what Welling et. al (2005) do.  It would
+    be useful to try the softmax, because it is more well-behaved.
+
+    There is a beta term that is proportional to document length. We
+    are not sure what beta scale is used by the authors. We use 1 as
+    the default, but this value might be inappropriate.
+    For numerical reasons, Yoshua recommends choosing beta such that
+    the lambda is expected to be around 1 for words that have a non-zero count.
+    So he would take:
+
+      beta = document_size / unique_words_per_document
+
+    I am not sure the above math is correct, I need to talk to him.
+
+    Yoshua notes that ``there is a x_i log(beta) term missing, if you
+    compare with eqn 2 (i.e., take the log). They did not include in
+    3 because it does not depend on the parameters, so the gradient
+    wrt it would be 0. But if you really want log-likelihood it should
+    be included.'' If you want a true log-likelihood, you probably should
+    actually compute the derivative of the entire eqn 2.
+
+    Axis is the axis along which we sum the target values, to obtain
+    the document length.
+
+    If sumloss, we sum the loss along axis.
+
+    If zerothreshold is non-zero, we threshold the loss:
+        If this target dimension is zero and beta * tensor.exp(output)
+        < zerothreshold, let this loss be zero.
+
+    @todo: Include logfactorial term
+    """
+#    from theano.printing import Print
+#    print dtype(target)        # make sure dtype is int32 or int64
+#    print target.dtype
+    doclen = tensor.sum(target, axis=axis)
+    lambdav = poissonlambda(output, doclen, beta_scale)
+    lossterms = lambdav - target*output
+    if sumloss:
+        return tensor.sum(lossterms, axis=axis)
+    else:
+        return lossterms
+#    return tensor.sum(beta * tensor.exp(output) - target*output + logfactorial(target), axis=axis)
+
+
+#import numpy
+#def nlpoisson_nontheano(target, output, beta_scale=1, axis=0):
+#    doclen = numpy.sum(target, axis=axis)
+#    print "doclen", doclen
+#    beta = beta_scale * doclen
+#    print "beta", beta
+#    print "exp", numpy.exp(output)
+#    print "beta * exp", beta * numpy.exp(output)
+#    print "x * y", target * output
+#
+#    import theano.tensor as TT
+#    x = TT.as_tensor(target)
+#    o = logfactorial(x)
+#    f = T.function([],o)
+#    logf = f()
+#    print "log factorial(x)", logf
+#    print "beta * exp - dot + log factorial", beta * numpy.exp(output) - target*output + f()
+#    print "total loss", numpy.sum(beta * numpy.exp(output) - target*output + f(), axis=axis)
+#
+##    return beta * numpy.exp(output) - numpy.dot(target, output)
+##            #+ logfactorial(target)
+#
+#import numpy
+#target = numpy.array([0, 0, 1, 1, 2, 2, 100, 100])
+##output = numpy.array([0., 0.5, 1., 0.5, 2., 0.5, 100., 0.5])
+#output = numpy.array([0., 1, 1., 0, 1, 0, 5, 1])
+#nlpoisson_nontheano(target, output)
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/algorithms/sandbox/kalman.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/algorithms/sandbox/kalman.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,57 @@
+
+"""
+Modules and misc. code related to the Kalman Filter.
+
+
+Kalman filter algorithm as presented in "Probabilistic Robotics"
+
+x_t is the state
+
+u_t is a control vector
+
+z_t is the observation vector
+
+\epsilon_t is a random noise term with zero mean and covariance R_t.
+
+\delta_t is a random noise term with zero mean and covariance Q_t.
+
+state (x_t) evolves according to 
+
+    x_t = A_t x_{t-1} + B_t u_t + \epsilon_t
+
+Observation z_t is made according to
+    
+    z_t = C_t x_t + \delta_t
+
+Assume that the distribution over initial states is a Gaussian.
+
+With these linear/Gaussian assumptions, the belief about the state all times t is Gaussian, so
+we can represent it compactly by the mean (mu) and the covariance (sigma).
+
+"""
+
+class KalmanModule(Module):
+    """
+    """
+    def __init__(self):
+
+        self.mu = Member()
+        self.sigma = Member()
+
+        u, z = vector(), vector()
+
+        # the formulas here work for A, B, R, C matrix or sparse matrix.
+        # ... anything that supports dot, +, -, dotinv, and transpose.
+
+        A, B, C= matrix(), matrix(), matrix()
+        R, Q = matrix(), matrix()
+
+        #algo from Probabilistic Robotics pg. 42
+        mu_bar = dot(A, self.mu) + dot(B, u)
+        sigma_bar = dot(A, self.sigma, A.T) + R
+        K = dot(sigma_bar, C.T, dotinv(dot(C, sigma_bar, C.T) + Q))
+        mu_t = mu_bar + dot(K, z - dot(C,mu_bar))
+        sigma_t = dot(ident - dot(K,C), sigma_bar)
+
+        self.update = Method([u, z, A, B, C, R, Q], [], updates = {self.mu:mu_t, self.sigma:sigma_t})
+
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/algorithms/sandbox/onehotop.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/algorithms/sandbox/onehotop.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,58 @@
+"""
+One hot Op
+"""
+
+#from theano import tensor
+from theano.tensor import as_tensor, Tensor
+from theano.gof import op
+from theano.gof.graph import Apply
+
+import numpy
+
+class OneHot(op.Op):
+    """
+    Construct a one-hot vector, x out of y.
+
+    @todo: Document inputs and outputs
+    @todo: Use 'bool' as output dtype? Or, at least 'int64' ? Not float64!
+    @todo: Use 'bool' as output dtype, not 'int64' ?
+    @todo: Allow this to operate on column vectors (Tensor)
+    @todo: Describe better.
+    """
+
+    def make_node(self, x, y):
+        """
+        @type x: Vector L{Tensor} of integers
+        @param x: The entries of the one-hot vector to be one.
+        @type y: Integer scalar L{Tensor}
+        @param y: The length (#columns) of the one-hot vectors.
+        @return: A L{Tensor} of one-hot vectors
+
+        @precondition: x < y for all entries of x
+        @todo: Check that x and y are int types
+        """
+        x = as_tensor(x)
+        y = as_tensor(y)
+        #assert x.dtype[0:3] == "int"
+        #assert y.dtype[0:3] == "int"
+        inputs = [x, y]
+        ##outputs = [tensor.Tensor("int64", broadcastable=[False, False])]
+        #outputs = [tensor.Tensor("float64", broadcastable=[False, False])]
+        #outputs = [Tensor("int64", broadcastable=[False, False])]
+        outputs = [Tensor("float64", broadcastable=[False, False]).make_result()]
+        node = Apply(op = self, inputs = inputs, outputs = outputs)
+        return node
+
+    def perform(self, node, (x, y), (out, )):
+        assert x.dtype == "int64" or x.dtype == "int32"
+        assert x.ndim == 1
+        assert y.dtype == "int64" or x.dtype == "int32"
+        assert y.ndim == 0
+        out[0] = numpy.zeros((x.shape[0], y), dtype="float64")
+        for c in range(x.shape[0]):
+            assert x[c] < y
+            out[0][c, x[c]] = 1
+
+    def grad(self, (x, y), (out_gradient, )):
+        return None, None
+one_hot = OneHot()
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/algorithms/sandbox/stat_ops.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/algorithms/sandbox/stat_ops.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,92 @@
+
+import theano
+from theano import gof
+from theano import tensor
+import numpy
+
+
+class ExampleWiseMean(gof.Op):
+    
+    def __init__(self):
+        self.destroy_map = {0: [1, 2]}
+
+    def make_node(self, x):
+        return gof.Apply(self,
+                         [x, tensor.value(float('nan')), tensor.value(0)],
+                         [tensor.Tensor(dtype = 'float64',
+                                        broadcastable = x.type.broadcastable)()])
+
+    def perform(self, node, (x, sum, n), (out,)):
+        if numpy.isnan(sum).any():
+            sum.resize(x.shape, refcheck=0)
+            sum[:] = x
+        else:
+            sum += x
+        n += 1
+        out[0] = sum / n
+
+    def c_code(self, name, node, (x, sum, n), (out, ), sub):
+        return """
+        PyObject* multi;
+        int nelems;
+        if (isnan(((double*)(%(sum)s->data))[0])) {
+            PyArray_Dims dims;
+            dims.len = %(x)s->nd;
+            dims.ptr = %(x)s->dimensions;
+            PyArray_Resize(%(sum)s, &dims, 0, PyArray_CORDER);
+            multi = PyArray_MultiIterNew(2, %(sum)s, %(x)s);
+            nelems = PyArray_SIZE(%(sum)s);
+            while (nelems--) {
+                // Copy %(x)s in %(sum)s
+                *(double*)PyArray_MultiIter_DATA(multi, 0) = *(double*)PyArray_MultiIter_DATA(multi, 1);
+                PyArray_MultiIter_NEXT(multi);
+            }
+        }
+        else {
+            // Add some error checking on the size of x
+            multi = PyArray_MultiIterNew(2, %(sum)s, %(x)s);
+            nelems = PyArray_SIZE(%(sum)s);
+            while (nelems--) {
+                // Add %(x)s to %(sum)s
+                *(double*)PyArray_MultiIter_DATA(multi, 0) += *(double*)PyArray_MultiIter_DATA(multi, 1);
+                PyArray_MultiIter_NEXT(multi);
+            }
+        }
+        ((npy_int64*)(%(n)s->data))[0]++;
+        int n = ((npy_int64*)(%(n)s->data))[0];
+        if (%(out)s == NULL) {
+            %(out)s = (PyArrayObject*)PyArray_EMPTY(%(sum)s->nd, %(sum)s->dimensions, NPY_FLOAT64, 0);
+        }
+        multi = PyArray_MultiIterNew(2, %(sum)s, %(out)s);
+        nelems = PyArray_SIZE(%(sum)s);
+        while (nelems--) {
+            // %(out)s <- %(sum)s / %(n)s
+            *(double*)PyArray_MultiIter_DATA(multi, 1) = *(double*)PyArray_MultiIter_DATA(multi, 0) / n;
+            PyArray_MultiIter_NEXT(multi);
+        }        
+        """ % dict(locals(), **sub)
+
+
+
+if __name__ == '__main__':
+    
+    vectors = numpy.random.RandomState(666).rand(10, 2)
+
+    x = tensor.dvector()
+    e = ExampleWiseMean()(x)
+
+    # f = theano.function([x], [e], linker = 'py')
+
+    # for i, v in enumerate(vectors):
+    #     print v, "->", f(v), numpy.mean(vectors[:i+1], axis=0)
+
+    # print
+
+    f = theano.function([x], [e], linker = 'c|py')
+
+    for i, v in enumerate(vectors):
+        print v, "->", f(v), numpy.mean(vectors[:i+1], axis=0)
+
+
+
+
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/algorithms/sandbox/test_cost.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/algorithms/sandbox/test_cost.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,53 @@
+import pylearn.algorithms.sandbox.cost as cost
+
+import unittest
+import theano as T
+import theano.tensor as TT
+import numpy
+
+class T_logfactorial(unittest.TestCase):
+    def test(self):
+        x = TT.as_tensor(range(10))
+        o = cost.logfactorial(x)
+        f = T.function([],o)
+        self.failUnless(numpy.all(f() - numpy.asarray([0., 0., 1.38629436, 3.29583687, 5.54517744, 8.04718956, 10.75055682, 13.62137104, 16.63553233, 19.7750212])) < 1e-5)
+
+    def test_float(self):
+        """
+        This should fail because we can't use floats in logfactorial
+        """
+        x = TT.as_tensor([0.5, 2.7])
+        o = cost.logfactorial(x)
+        f = T.function([],o)
+#        print repr(f())
+        self.failUnless(numpy.all(f() == numpy.asarray([0., 0., 1.38629436, 3.29583687, 5.54517744, 8.04718956, 10.75055682, 13.62137104, 16.63553233, 19.7750212])))
+
+class T_nlpoisson(unittest.TestCase):
+    def test(self):
+        target = TT.as_tensor([0, 0, 1, 1, 2, 2, 100, 100])
+        output = TT.as_tensor([0., 1, 1., 0, 1, 0, 5, 1])
+        o = cost.nlpoisson(target, output)
+        f = T.function([],o)
+        self.failUnless(f() - 33751.7816277 < 1e-5)
+
+    def test_gradient(self):
+        target = TT.as_tensor([0, 0, 1, 1, 2, 2, 100, 100])
+        output = TT.as_tensor([0., 1, 1., 0, 1, 0, 5, 1])
+        loss = cost.nlpoisson(target, output)
+        (goutput) = TT.grad(loss, [output])
+#        (goutput) = TT.grad(loss, [target])
+        f = T.function([], goutput)
+        print f()
+        self.failUnless(numpy.all(f() - numpy.asarray([206., 559.96605666, 558.96605666, 205., 557.96605666, 204., 30473.11077513, 459.96605666] < 1e-5)))
+
+    def test_gradient_fail(self):
+        target = TT.as_tensor([0, 0, 1, 1, 2, 2, 100, 100])
+        output = TT.as_tensor([0., 1, 1., 0, 1, 0, 5, 1])
+        loss = cost.nlpoisson(target, output)
+        (goutput) = TT.grad(loss, [target])
+        f = T.function([], goutput)
+        print f()
+        self.failUnless(numpy.all(f() - numpy.asarray([206., 559.96605666, 558.96605666, 205., 557.96605666, 204., 30473.11077513, 459.96605666] < 1e-5)))
+
+if __name__ == '__main__':
+    unittest.main()
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/algorithms/sgd.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/algorithms/sgd.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,48 @@
+"""A stochastic gradient descent minimizer. (Possibly the simplest minimizer.)
+"""
+
+import theano
+
+class StochasticGradientDescent(theano.Module):
+    """Fixed stepsize gradient descent"""
+    def __init__(self, args, cost, params, gradients=None, stepsize=None):
+        """
+        :param stepsize: the step to take in (negative) gradient direction
+        :type stepsize: None, scalar value, or scalar TensorVariable
+        """
+        super(StochasticGradientDescent, self).__init__()
+        self.stepsize_init = None
+
+        if stepsize is None:
+            self.stepsize = theano.tensor.dscalar()
+        elif isinstance(stepsize, theano.tensor.TensorVariable):
+            self.stepsize = stepsize
+        else:
+            self.stepsize = (theano.tensor.as_tensor_variable(stepsize))
+
+        if self.stepsize.ndim != 0:
+            raise TypeError('stepsize must be a scalar', stepsize)
+
+        self.params = params
+        self.gparams = theano.tensor.grad(cost, self.params) if gradients is None else gradients
+
+        self.updates = dict((p, p - self.stepsize * g) for p, g in zip(self.params, self.gparams))
+
+        self.step = theano.Method(
+                args, [],
+                updates=self.updates)
+        self.step_cost = theano.Method(
+                args, cost,
+                updates=self.updates)
+
+    def _instance_initialize(self, obj):
+        pass
+
+def sgd_minimizer(stepsize=None):
+    """Curry the stepsize argument to StochasticGradientDescent, providing standard minimizer interface
+    
+    :returns: standard minimizer constructor f(args, cost, params, gradient=None)
+    """
+    def f(args, cost, params, gradient=None):
+        return StochasticGradientDescent(args, cost, params, gradient, stepsize)
+    return f
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/algorithms/stacker.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/algorithms/stacker.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,102 @@
+
+# for example in examples:
+#     repr = example
+#     for layer in stacked.layers:
+#         layer.update(repr)
+#         repr = layer.representation(repr)
+
+import theano
+from theano import tensor as T
+from theano.tensor.deprecated import rmodule
+import sys
+import numpy as N
+
+class Stacker(rmodule.RModule):
+    """
+    @note: Assumes some names in the layers: input, cost, lr, and update
+    @todo: Maybe compile functions on demand, rather than immediately.
+    """
+
+    def __init__(self, submodules, input = None, regularize = False):
+        super(Stacker, self).__init__()
+
+        current = input
+        layers = []
+        for i, (submodule, outname) in enumerate(submodules):
+            layer = submodule(current, regularize = regularize)
+            layers.append(layer)
+            current = layer[outname]
+        self.layers = layers
+
+        self.input = self.layers[0].input
+        self.output = current
+
+        representation = []
+        local_update = []
+        global_update = []
+        to_update = []
+        all_kits = []
+        for layer, (submodule, outname) in zip(layers, submodules):
+            u = layer.update
+            u.resolve_all()
+            to_update += u.updates.keys()
+            all_kits += u.kits
+            # the input is the whole deep model's input instead of the layer's own
+            # input (which is previous_layer[outname])
+            inputs = [self.input] + u.inputs[1:]
+            method = theano.Method(inputs, u.outputs, u.updates, u.kits)
+            local_update.append(method)
+            global_update.append(
+                theano.Method(inputs,
+                              u.outputs,
+                              # we update the params of the previous layers too but wrt
+                              # this layer's cost
+                              dict((param, param - layer.lr * T.grad(layer.cost, param))
+                                   for param in to_update),
+                              list(all_kits)))
+            representation.append(theano.Method(self.input, layer[outname]))
+
+#           @todo: Add diagnostics
+#             self.diagnose_from_input = Method([self.input], self.layers[0].diagnose.outputs + self.layers[1].diagnose.outputs ...
+
+        self.local_update = local_update
+        self.global_update = global_update
+        self.representation = representation
+        self.update = self.global_update[-1]
+        self.compute = theano.Method(self.input, self.output)
+        ll = self.layers[-1]
+        for name, method in ll.components_map():
+            if isinstance(method, theano.Method) and not hasattr(self, name):
+                m = method.dup()
+                m.resolve_all()
+                m.inputs = [self.input if x is ll.input else x for x in m.inputs]
+                setattr(self, name, m)
+
+    def _instance_initialize(self, obj, nunits = None, lr = 0.01, seed = None, **kwargs):
+        super(Stacker, self)._instance_initialize(obj, **kwargs)
+        if seed is not None:
+            R = N.random.RandomState(seed)
+        else:
+            R = N.random
+        for layer in obj.layers:
+            if layer.lr is None:
+                layer.lr = lr
+        if nunits:
+            obj.input_dimension = nunits[0]
+            obj.output_dimension = nunits[-1]
+            if len(nunits) != len(obj.layers) + 1:
+                raise ValueError('You should give exactly one more unit numbers as there are layers.')
+            for ni, no, layer in zip(nunits[:-1], nunits[1:], obj.layers):
+                if seed is not None:
+                    layer.initialize(ni, no, seed = R.random_integers(sys.maxint - 1))
+                else:
+                    layer.initialize(ni, no)
+        if seed is not None:
+            obj.seed(seed)
+
+    def _instance_flops_approx(self, obj):
+        rval = 0
+        for layer in obj.layers:
+            rval += layer.flops_approx()
+        return rval
+
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/algorithms/stopper.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/algorithms/stopper.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,125 @@
+"""Early stopping iterators
+
+The idea here is to supply early-stopping heuristics that can be used in the
+form:
+
+    stopper = SomeEarlyStopper()
+
+    for i in stopper():
+        # train from data
+        if i.set_score:
+            i.score = validation_score
+
+
+So far I only have one heuristic, so maybe this won't scale.
+"""
+
+class Stopper(object):
+
+    def train(self, data, update_rows_fn, update, validate, save=None):
+        """Return the best model trained on data
+
+        Parameters:
+        data - a thing that accepts getitem(<list of int64>), or a tuple of such things
+        update_rows_fn - fn : int --> <list or tensor of int>
+        update - fn: update an internal model from elements of data
+        validate - fn: evaluate an internal model based on elements of data
+        save - fn: return a copy of the internal model
+
+        The body of this function exhausts the <self> iterator, and trains a
+        model using early stopping in the process.
+        """
+
+        best = None
+        for stp in self:
+            i = stp.iter
+
+            # call update on some training set rows
+            t_rows = update_rows_fn(i)
+            if isinstance(data, (tuple, list)):
+                update(*[d[t_rows] for d in data])
+            else:
+                update(data[t_rows])
+
+            if stp.set_score:
+                stp.score = validate()
+                if (stp.score < stp.best_score) and save:
+                    best = save()
+        return best
+
+    def find_min(self, step, check, save):
+        best = None
+        for stp in self:
+            step()
+            if stp.set_score:
+                stp.score = check()
+                if (stp.score < stp.best_score) and save:
+                    best = (save(), stp.iter, stp.score)
+        return best
+
+class ICML08Stopper(Stopper):
+    @staticmethod
+    def icml08(ntrain, batchsize):
+        """Some setting similar to what I used for ICML08 submission"""
+        #TODO: what did I actually use? put that in here.
+        return ICML08Stopper(30*ntrain/batchsize,
+                ntrain/batchsize, 0.96, 2.0, 100000000)
+
+    def __init__(self, i_wait, v_int, min_improvement, patience, hard_limit):
+        self.initial_wait = i_wait
+        self.set_score_interval = v_int
+        self.min_improvement = min_improvement
+        self.patience = patience
+        self.hard_limit = hard_limit
+
+        self.best_score = float('inf')
+        self.best_iter = -1
+        self.iter = -1
+
+        self.set_score = False
+        self.score = None
+
+    def __iter__(self):
+        return self
+
+    E_set_score = 'when iter.set_score is True, caller must assign a score to iter.score'
+    def next(self):
+
+        #print 'ICML08 stopper, were doing a next'
+
+        if self.set_score: #left over from last time
+            if self.score is None:
+                raise Exception(ICML08Stopper.E_set_score)
+            if self.score < (self.best_score * self.min_improvement):
+                (self.best_score, self.best_iter) = (self.score, self.iter)
+            self.score = None #un-set it
+
+
+        starting = self.iter < self.initial_wait
+        waiting = self.iter < (self.patience * self.best_iter)
+        if starting or waiting:
+            # continue to iterate
+            self.iter += 1
+            if self.iter == self.hard_limit:
+                raise StopIteration
+            self.set_score = (self.iter % self.set_score_interval == 0)
+            return self
+
+        raise StopIteration
+
+class NStages(ICML08Stopper):
+    """Run for a fixed number of steps, checking validation set every so
+    often."""
+    def __init__(self, hard_limit, v_int):
+        ICML08Stopper.__init__(self, hard_limit, v_int, 1.0, 1.0, hard_limit)
+
+    #TODO: could optimize next() function. Most of what's in ICML08Stopper.next()
+    #is not necessary
+
+def geometric_patience(i_wait, v_int, min_improvement, patience, hard_limit):
+    return ICML08Stopper(i_wait, v_int, min_improvement, patience, hard_limit)
+
+def nstages(hard_limit, v_int):
+    return ICML08Stopper(hard_limit, v_int, 1.0, 1.0, hard_limit)
+
+
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/algorithms/tests/test_aa.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/algorithms/tests/test_aa.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,51 @@
+#from __future__ import absolute_imports
+
+from pylearn.algorithms import aa as models
+import theano
+import numpy
+import time
+
+
+def test_train(mode = theano.Mode('c|py', 'fast_run')):
+
+    aa = models.SigmoidXEAutoEncoder(regularize = False)
+#     print aa.update.pretty(mode = theano.Mode('py', 'fast_run').excluding('inplace'))
+
+    model = aa.make(lr = 0.01,
+                    input_size = 100,
+                    hidden_size = 1000,
+                    mode = mode)
+
+    data = [[0, 1, 0, 0, 1, 1, 1, 0, 1, 0]*10]*10
+    #data = numpy.random.rand(10, 100)
+
+    t1 = time.time()
+    for i in xrange(1001):
+        cost = model.update(data)
+        if i % 100 == 0:
+            print i, cost
+    t2 = time.time()
+    return t2 - t1
+
+if __name__ == '__main__':
+    numpy.random.seed(10)
+    print 'sanity check:'
+    t1 = test_train('SANITY_CHECK')
+#     t1 = test_train([theano.Mode('c|py', 'fast_compile'),
+#                      theano.Mode('c|py', 'fast_run')])
+    print 'time:',t1
+    print
+
+    numpy.random.seed(10)
+    print 'optimized:'
+    t1 = test_train(theano.Mode('c|py', 'fast_run'))
+    print 'time:',t1
+    print
+
+    numpy.random.seed(10)
+    print 'not optimized:'
+    t2 = test_train(theano.Mode('c|py', 'fast_compile'))
+    print 'time:',t2
+
+
+
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/algorithms/tests/test_daa.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/algorithms/tests/test_daa.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,90 @@
+#!/usr/bin/python
+
+from pylearn import algorithms as models
+import theano
+import numpy
+import time
+
+import pylearn.algorithms.logistic_regression
+
+def test_train_daa(mode = theano.Mode('c|py', 'fast_run')):
+
+    ndaa = 3
+    daa = models.Stacker([(models.SigmoidXEDenoisingAA, 'hidden')] * ndaa + [(models.BinRegressor, 'output')],
+                         regularize = False)
+
+    model = daa.make([4, 20, 20, 20, 1],
+                     lr = 0.01,
+                     mode = mode,
+                     seed = 10)
+
+    model.layers[0].noise_level = 0.3
+    model.layers[1].noise_level = 0.3
+    model.layers[2].noise_level = 0.3
+
+    # Update the first hidden layer
+    for l in range(3):
+        for i in range(10):
+            model.local_update[l]([[0, 1, 0, 1]])
+            model.local_update[l]([[1, 0, 1, 0]])
+
+    for i in range(10):
+        model.update([[0, 1, 0, 1]], [[1]])
+        model.update([[1, 0, 1, 0]], [[0]])
+    print model.classify([[0, 1, 0, 1]])
+    print model.classify([[1, 0, 1, 0]])
+
+
+def test_train_daa2(mode = theano.Mode('c|py', 'fast_run')):
+
+    ndaa = 3
+    daa = models.Stacker([(models.SigmoidXEDenoisingAA, 'hidden')] * ndaa + [(pylearn.algorithms.logistic_regression.Module_Nclass, 'pred')],
+                         regularize = False)
+
+    model = daa.make([4] + [20] * ndaa + [10],
+                     lr = 0.01,
+                     mode = mode,
+                     seed = 10)
+
+    for l in range(ndaa): model.layers[l].noise_level = 0.3
+
+    instances = [([[0, 1, 0, 1]], [1]), ([[1, 0, 1, 0]], [0])]
+
+    for l in range(ndaa):
+        for i in range(10):
+            for (input, output) in instances:
+                model.local_update[l](input)
+
+    for i in range(10):
+        for (input, output) in instances:
+#            model.update(input, output)
+            print "OLD:", 
+            print model.validate(input, output)
+            oldloss = model.update(input, output)
+            print oldloss
+            print "NEW:"
+            print model.validate(input, output)
+            print 
+
+    print model.apply([[0, 1, 0, 1]])
+    print model.apply([[1, 0, 1, 0]])
+
+
+
+
+if __name__ == '__main__':
+#    print 'optimized:'
+#    t1 = test_train_daa(theano.Mode('py', 'fast_compile'))
+#    t1 = test_train_daa(theano.Mode('c|py', 'fast_run'))
+#    print 'time:',t1
+#    print
+
+#    print 'not optimized:'
+#    t2 = test_train_daa(theano.Mode('c|py', 'fast_compile'))
+##    print 'time:',t2
+
+#    test_train_daa(theano.compile.Mode('c&py', 'merge'))
+#    test_train_daa(theano.compile.Mode('c|py', 'merge'))
+    test_train_daa(theano.compile.Mode('py', 'merge'))
+
+    test_train_daa2(theano.compile.Mode('c|py', 'merge'))
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/algorithms/tests/test_linear_regression.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/algorithms/tests/test_linear_regression.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,38 @@
+
+import unittest
+from pylearn.algorithms.linear_regression import *
+from make_test_datasets import *
+import numpy
+
+class test_linear_regression(unittest.TestCase):
+
+    def test1(self):
+        trainset,testset,theta=make_artificial_datasets_from_function(n_inputs=3,
+                                                                      n_targets=2,
+                                                                      n_examples=100,
+                                                                      f=linear_predictor)
+        
+        assert trainset.fields()['input'].shape==(50,3)
+        assert testset.fields()['target'].shape==(50,2)
+        regressor = LinearRegression(L2_regularizer=0.1)
+        predictor = regressor(trainset)
+        test_data = testset.fields()
+        mse = predictor.compute_mse(test_data['input'],test_data['target'])
+        print 'mse = ',mse
+        
+if __name__ == '__main__':
+    import sys
+
+    if len(sys.argv)==1:
+        unittest.main()
+    else:
+        assert sys.argv[1]=="--debug"
+        tests = []
+        for arg in sys.argv[2:]:
+            tests.append(arg)
+        if tests:
+            unittest.TestSuite(map(T_DataSet, tests)).debug()
+        else:
+            module = __import__("_test_linear_regression")
+            tests = unittest.TestLoader().loadTestsFromModule(module)
+            tests.debug()
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/algorithms/tests/test_logistic_regression.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/algorithms/tests/test_logistic_regression.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,60 @@
+from pylearn.algorithms.logistic_regression import *
+import sys, time
+
+if __name__ == '__main__':
+    pprint.assign(nnet.crossentropy_softmax_1hot_with_bias_dx, printing.FunctionPrinter('xsoftmaxdx'))
+    pprint.assign(nnet.crossentropy_softmax_argmax_1hot_with_bias, printing.FunctionPrinter('nll', 'softmax', 'argmax'))
+    if 1:
+        lrc = Module_Nclass()
+
+        print '================'
+        print lrc.update.pretty()
+        print '================'
+        print lrc.update.pretty(mode = theano.Mode('py', 'fast_run'))
+        print '================'
+#         print lrc.update.pretty(mode = compile.FAST_RUN.excluding('inplace'))
+#         print '================'
+
+#        sys.exit(0)
+
+        lr = lrc.make(10, 2, mode=theano.Mode('c|py', 'fast_run'))
+        #lr = lrc.make(10, 2, mode=compile.FAST_RUN.excluding('fast_run'))
+        #lr = lrc.make(10, 2, mode=theano.Mode('py', 'merge')) #'FAST_RUN')
+
+        data_x = N.random.randn(5, 10)
+        data_y = (N.random.randn(5) > 0)
+
+        t = time.time()
+        for i in xrange(10000):
+            lr.lr = 0.02
+            xe = lr.update(data_x, data_y) 
+            #if i % 100 == 0:
+            #    print i, xe
+
+        print 'training time:', time.time() - t
+        print 'final error', xe
+
+        #print
+        #print 'TRAINED MODEL:'
+        #print lr
+
+    if 0:
+        lrc = Module()
+
+        lr = lrc.make(10, mode=theano.Mode('c|py', 'merge')) #'FAST_RUN')
+
+        data_x = N.random.randn(5, 10)
+        data_y = (N.random.randn(5, 1) > 0)
+
+        for i in xrange(10000):
+            xe = lr.update(data_x, data_y)
+            if i % 100 == 0:
+                print i, xe
+
+        print
+        print 'TRAINED MODEL:'
+        print lr
+
+
+
+
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/algorithms/tests/test_regressor.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/algorithms/tests/test_regressor.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,46 @@
+
+
+import pylearn.algorithms as models
+import theano
+import numpy
+import time
+
+
+def test_train(mode = theano.Mode('c|py', 'fast_run')):
+
+    reg = models.BinRegressor(regularize = False)
+
+    model = reg.make(lr = 0.01,
+                     input_size = 100,
+                     mode = mode,
+                     seed = 10)
+
+#     data = [[0, 1, 0, 0, 1, 1, 1, 0, 1, 0]*10]*10
+#     targets = [[1]]*10
+    #data = numpy.random.rand(10, 100)
+
+    R = numpy.random.RandomState(100)
+    t1 = time.time()
+    for i in xrange(1001):
+        data = R.random_integers(0, 1, size = (10, 100))
+        targets = data[:, 6].reshape((10, 1))
+        cost = model.update(data, targets)
+        if i % 100 == 0:
+            print i, '\t', cost, '\t', 1*(targets.T == model.classify(data).T)
+    t2 = time.time()
+    return t2 - t1
+
+if __name__ == '__main__':
+    print 'optimized:'
+    t1 = test_train(theano.Mode('c|py', 'fast_run'))
+    print 'time:',t1
+    print
+
+    print 'not optimized:'
+    t2 = test_train(theano.Mode('c|py', 'fast_compile'))
+    print 'time:',t2
+
+
+
+
+
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/algorithms/tests/test_sgd.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/algorithms/tests/test_sgd.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,68 @@
+import theano
+from pylearn.algorithms import sgd
+
+def test_sgd0():
+
+    x = theano.tensor.dscalar('x')
+    y = theano.tensor.dscalar('y')
+
+    M = sgd.StochasticGradientDescent([x], (1.0 - x * y)**2, [y], stepsize=0.01)
+    M.y = y
+    m = M.make()
+    m.y = 5.0
+    for i in xrange(100):
+        c = m.step_cost(3.0)
+        # print c, m.y
+
+    assert c < 1.0e-5
+    assert abs(m.y - (1.0 / 3)) < 1.0e-4
+
+def test_sgd_stepsize_variable():
+
+    x = theano.tensor.dscalar('x')
+    y = theano.tensor.dscalar('y')
+    lr = theano.tensor.dscalar('lr')
+
+    M = sgd.StochasticGradientDescent([x], (1.0 - x * y)**2, [y], stepsize=lr)
+    M.y = y
+    M.lr = lr
+    m = M.make()
+    m.y = 5.0
+    m.lr = 0.01
+    for i in xrange(100):
+        c = m.step_cost(3.0)
+        # print c, m.y
+
+    assert c < 1.0e-5
+    assert abs(m.y - (1.0 / 3)) < 1.0e-4
+
+
+    #test that changing the lr has impact
+
+    m.y = 5.0
+    m.lr = 0.0
+    for i in xrange(10):
+        c = m.step_cost(3.0)
+        # print c, m.y
+
+    assert m.y == 5.0
+
+def test_sgd_stepsize_none():
+
+    x = theano.tensor.dscalar('x')
+    y = theano.tensor.dscalar('y')
+
+    M = sgd.StochasticGradientDescent([x], (1.0 - x * y)**2, [y])
+    M.y = y
+    m = M.make()
+    m.y = 5.0
+    #there should be a learning rate here by default
+    assert m.stepsize is None
+    m.stepsize = 0.01
+    for i in xrange(100):
+        c = m.step_cost(3.0)
+        # print c, m.y
+
+    assert c < 1.0e-5
+    assert abs(m.y - (1.0 / 3)) < 1.0e-4
+
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/algorithms/tests/test_stacker.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/algorithms/tests/test_stacker.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,43 @@
+
+import pylearn.algorithms as models
+import theano
+import numpy
+import time
+
+
+def test_train(mode = theano.Mode('c|py', 'fast_run')):
+
+    reg = models.Stacker([(models.BinRegressor, 'output'), (models.BinRegressor, 'output')],
+                         regularize = False)
+    #print reg.global_update[1].pretty(mode = mode.excluding('inplace'))
+
+    model = reg.make([100, 200, 1],
+                     lr = 0.01,
+                     mode = mode,
+                     seed = 10)
+
+    R = numpy.random.RandomState(100)
+    t1 = time.time()
+    for i in xrange(1001):
+        data = R.random_integers(0, 1, size = (10, 100))
+        targets = data[:, 6].reshape((10, 1))
+        cost = model.update(data, targets)
+        if i % 100 == 0:
+            print i, '\t', cost, '\t', 1*(targets.T == model.classify(data).T)
+    t2 = time.time()
+    return t2 - t1
+
+if __name__ == '__main__':
+    print 'optimized:'
+    t1 = test_train(theano.Mode('c|py', 'fast_run'))
+    print 'time:',t1
+    print
+
+    print 'not optimized:'
+    t2 = test_train(theano.Mode('c|py', 'fast_compile'))
+    print 'time:',t2
+
+
+
+
+
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/algorithms/weights.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/algorithms/weights.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,40 @@
+"""
+Routine to initialize weights.
+
+@note: We assume that numpy.random.seed() has already been performed.
+"""
+
+from math import pow, sqrt
+import numpy.random
+
+sqrt3 = sqrt(3.0)
+def random_weights(nin, nout, scale_by=1./sqrt3, power=0.5):
+    """
+    Generate an initial weight matrix with nin inputs (rows) and nout
+    outputs (cols).
+    Each weight is chosen uniformly at random to be in range:
+        [-scale_by*sqrt(3)/pow(nin,power), +scale_by*sqrt(3)/pow(nin,power)]
+    @note: Play with scale_by, but reasonable values are <=1, maybe 1./sqrt3
+    power=0.5 is strongly recommanded (see below).
+
+    Suppose these weights w are used in dot products as follows:
+       output = w' input
+    If w ~ Uniform(-r,r) and Var[input_i]=1 and x_i's are independent, then
+       Var[w]=r2/3
+       Var[output] = Var[ sum_{i=1}^d w_i input_i] = d r2 / 3
+    To make sure that variance is not changed after the dot product,
+    we therefore want Var[output]=1 and r = sqrt(3)/sqrt(d).  This choice
+    corresponds to the default values scale_by=sqrt(3) and power=0.5.
+    More generally we see that Var[output] = Var[input] * scale_by.
+
+    Now, if these are weights in a deep multi-layer neural network,
+    we would like the top layers to be initially more linear, so as to let
+    gradients flow back more easily (this is an explanation by Ronan Collobert).
+    To achieve this we want scale_by smaller than 1.
+    Ronan used scale_by=1/sqrt(3) (by mistake!) and got better results than scale_by=1
+    in the experiment of his ICML'2008 paper.
+    Note that if we have a multi-layer network, ignoring the effect of the tanh non-linearity,
+    the variance of the layer outputs would go down roughly by a factor 'scale_by' at each
+    layer (making the layers more linear as we go up towards the output).
+    """
+    return (numpy.random.rand(nin, nout) * 2.0 - 1) * scale_by * sqrt3 / pow(nin,power)
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/datasets/MNIST.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/datasets/MNIST.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,81 @@
+"""
+Various routines to load/access MNIST data.
+"""
+from __future__ import absolute_import
+
+import os
+import numpy
+
+from ..io.amat import AMat
+from .config import data_root # config
+from .dataset import Dataset
+
+def head(n=10, path=None):
+    """Load the first MNIST examples.
+
+    Returns two matrices: x, y.  x has N rows of 784 columns.  Each row of x represents the
+    28x28 grey-scale pixels in raster order.  y is a vector of N integers.  Each element y[i]
+    is the label of the i'th row of x.
+    
+    """
+    path = os.path.join(data_root(), 'mnist','mnist_with_header.amat') if path is None else path
+
+    dat = AMat(path=path, head=n)
+
+    try:
+        assert dat.input.shape[0] == n
+        assert dat.target.shape[0] == n
+    except Exception , e:
+        raise Exception("failed to read MNIST data", (dat, e))
+
+    return dat.input, numpy.asarray(dat.target, dtype='int64').reshape(dat.target.shape[0])
+
+def all(path=None):
+    return head(n=None, path=path)
+
+def train_valid_test(ntrain=50000, nvalid=10000, ntest=10000, path=None):
+    all_x, all_targ = head(ntrain+nvalid+ntest, path=path)
+
+    rval = Dataset()
+
+    rval.train = Dataset.Obj(x=all_x[0:ntrain],
+            y=all_targ[0:ntrain])
+    rval.valid = Dataset.Obj(x=all_x[ntrain:ntrain+nvalid],
+            y=all_targ[ntrain:ntrain+nvalid])
+    rval.test =  Dataset.Obj(x=all_x[ntrain+nvalid:ntrain+nvalid+ntest],
+            y=all_targ[ntrain+nvalid:ntrain+nvalid+ntest])
+
+    rval.n_classes = 10
+    rval.img_shape = (28,28)
+    return rval
+
+
+def full():
+    return train_valid_test()
+
+#usefull for test, keep it
+def first_10():
+    return train_valid_test(ntrain=10, nvalid=10, ntest=10)
+
+#usefull for test, keep it
+def first_100():
+    return train_valid_test(ntrain=100, nvalid=100, ntest=100)
+
+def first_1k():
+    return train_valid_test(ntrain=1000, nvalid=200, ntest=200)
+
+def first_10k():
+    return train_valid_test(ntrain=10000, nvalid=2000, ntest=2000)
+
+#old method from factory idea days... delete when ready -JB20090119
+def mnist_factory(variant="", ntrain=None, nvalid=None, ntest=None):
+    if variant=="":
+        return full()
+    elif variant=="1k":
+        return first_1k()
+    elif variant=="10k":
+        return first_10k()
+    elif variant=="custom":
+        return train_valid_test(ntrain=ntrain, nvalid=nvalid, ntest=ntest)
+    else:
+        raise Exception('Unknown MNIST variant', variant)
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/datasets/__init__.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/datasets/__init__.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,1 @@
+from dataset import make_dataset, Dataset
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/datasets/config.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/datasets/config.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,18 @@
+"""Configuration options for datasets
+
+
+Especially, the locations of data files.
+"""
+
+import os, sys
+def env_get(key, default, key2 = None):
+    if key2 and os.getenv(key) is None:
+        key=key2
+    if os.getenv(key) is None:
+        print >> sys.stderr, "WARNING: Environment variable", key,
+        print >> sys.stderr, "is not set. Using default of", default
+    return default if os.getenv(key) is None else os.getenv(key)
+
+def data_root():
+    return env_get('PYLEARN_DATA_ROOT', os.getenv('HOME')+'/data', 'DBPATH')
+
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/datasets/dataset.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/datasets/dataset.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,120 @@
+"""The dataset-from-descriptor mechanism."""
+
+_datasets = {}
+
+def add_dataset_factory(family, fn):
+    """Add `fn` as the handler for descriptors whose first token is `family`.
+
+    :returns: None
+
+    """
+    if family in _datasets:
+        raise Exception('dataset identifier already in use:', family)
+    else:
+        _datasets[family] = fn
+
+def dataset_factory(family):
+    """Register a function as the handler for a given kind of dataset, identified by `family`.
+
+    When someone calls dataset_from_descr('kind_of_dataset option1 option2, etc.', approx=1),
+    then the handler registered for 'kind_of_dataset' will be called with the same arguments as
+    dataset_from_descr.
+
+    .. code-block:: python
+        
+        @dataset_factory('MNIST')
+        def mnist_related_dataset(descr, **kwargs):
+            ...
+
+    :returns: `dectorator`
+    """
+    def decorator(fn):
+        add_dataset_factory(family, fn)
+        return fn
+    return decorator
+
+def make_dataset(family, **kwargs):
+    """Return the dataset described by `descr`.
+
+    :param descr: a dataset identifier
+    :type descr: str
+    :returns: `Dataset`
+
+    """
+    return _datasets[family](**kwargs)
+
+
+class Dataset(object):
+    class Obj(object):
+        def __init__(self, **kwargs):
+            self.__dict__.update(kwargs)
+
+    """Dataset is a generic container for pylearn datasets.
+
+    It is not intended to put any restriction whatsoever on its contents.
+
+    It is intended to encourage certain conventions, described below.  Conventions should arise
+    naturally among datasets in PyLearn.  When a few datasets adhere to a new convention, then
+    describe it here and make it more official.
+
+    If no particular convention applies.  Create your own object to store the dataset, and
+    assign it to the `data` attribute.
+    """
+    data = None
+
+    """
+    SIMPLE REGRESSION / CLASSIFICATION
+    ----------------------------------
+
+    In this setting, you are aiming to do vector classification or vector regression
+    where your train, valid and test sets fit in memory.
+    The convention is to put your data into numpy ndarray instances.  Put training data in the
+    `train` attribute,  validation data in the `valid` attribute and test data in the `test
+    attribute`.
+    Each of those attributes should be an instance that defines at least two attributes: `x` for the
+    input matrix and `y` for the target matrix.  The `x` ndarray should be one example per
+    leading index (row for matrices).
+    The `y` ndarray should be one target per leading index (entry for vectors, row for matrices).
+    If `y` is a classification target, than it should be a vector with numpy dtype 'int32'.
+    
+    If there are weights associated with different examples, then create a 'weights' attribute whose
+    value is a vector with one floating-point value (typically double-precision) per example.
+
+    If the task is classification, then the classes should be mapped to the integers
+    0,1,...,N-1.
+    The number of classes (here, N) should be stored in the `n_classes` attribute.
+
+    """
+    train = None #instance with .x, .y
+
+    valid = None #instance with .x, .y
+
+    test = None #instance with .x, .y
+
+    n_classes = None  #int
+
+    """
+    WHEN INPUTS ARE FIXED-SIZE GREYSCALE IMAGES
+    -------------------------------------------
+
+    In this setting we typically encode images as vectors, by enumerating the pixel values in
+    left-to-right, top-to-bottom order.  Pixel values should be in floating-point, and
+    normalized between 0 and 1.
+
+    The shape of the images should be recorded in the `img_shape` attribute as a tuple (rows,
+    cols).
+
+    """
+
+    img_shape = None # (rows, cols)
+
+
+    """
+    TIMESERIES
+    ----------
+
+    When dealing with examples which are themselves timeseries, put each example timeseries in a
+    tensor and make a list of them.  Generally use tensors, and resort to lists or arrays
+    wherever different 
+    """
+
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/datasets/embeddings/README.txt
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/datasets/embeddings/README.txt	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,3 @@
+Messy scripts for working with Jason + Ronan's embeddings.
+
+Parameters are given in parameters.py
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/datasets/embeddings/__init__.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/datasets/embeddings/__init__.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,1 @@
+from process import *
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/datasets/embeddings/convert.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/datasets/embeddings/convert.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,15 @@
+#!/usr/bin/python
+"""
+Convert stdin sentences to word embeddings, and output YAML.
+"""
+
+import sys, string
+import read
+import yaml
+
+output = []
+for l in sys.stdin:
+    l = string.strip(l)
+    output.append((l, read.convert_string(l)))
+
+print yaml.dump(output)
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/datasets/embeddings/one-per-line.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/datasets/embeddings/one-per-line.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,27 @@
+#!/usr/bin/python
+
+import string
+#import psyco
+
+weightsfile = "lm-weights.txt"
+vocabfile = "words.asc"
+size = 30000
+dimensions = 50
+
+import numpy, math
+import sys
+from percent import percent
+
+word_to_vector = {}
+
+f = open(weightsfile)
+f.readline()
+vals = [float(v) for v in string.split(f.readline())]
+assert len(vals) == size * dimensions
+vals.reverse()
+#for i in range(size):
+r = range(size)
+r.reverse()
+for i in r:
+    l = vals[dimensions*i:dimensions*(i+1)]
+    print string.join([`s` for s in l], "\t")
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/datasets/embeddings/parameters.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/datasets/embeddings/parameters.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,10 @@
+"""
+Locations of the embedding data files.
+"""
+WEIGHTSFILE     = "/home/fringant2/lisa/data/word_embeddings.collobert-and-weston/lm-weights.txt"
+VOCABFILE       = "/home/fringant2/lisa/data/word_embeddings.collobert-and-weston/words.asc"
+#WEIGHTSFILE     = "/home/joseph/data/word_embeddings.collobert-and-weston/lm-weights.txt"
+#VOCABFILE       = "/home/joseph/data/word_embeddings.collobert-and-weston/words.asc"
+NUMBER_OF_WORDS = 30000
+DIMENSIONS      = 50
+UNKNOWN         = "UNKNOWN"
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/datasets/embeddings/percent.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/datasets/embeddings/percent.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,9 @@
+def percent(a, b):
+    """
+    Return percentage string of a and b, e.g.:
+        "1 of 10 (10%)"
+    """
+    assert a <= b
+    assert a >= 0
+    assert b > 0
+    return "%s of %s (%.2f%%)" % (a, b, 100.*a/b)
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/datasets/embeddings/process.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/datasets/embeddings/process.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,136 @@
+"""
+Read in the weights file
+"""
+
+import string
+import sys
+
+from parameters import *
+
+__words = None
+__word_to_embedding = None
+__read = False
+
+def length():
+    """
+    @return: The length of embeddings
+    """
+    return len(__word_to_embedding[__words[0]])
+
+def word_to_embedding(w):
+    read_embeddings()
+    return __word_to_embedding[w]
+
+def read_embeddings():
+    global __words
+    global __word_to_embedding
+    global __read 
+    if __read: return
+
+    __words = [string.strip(w) for w in open(VOCABFILE).readlines()]
+    assert len(__words) == NUMBER_OF_WORDS
+
+    import numpy, math
+    from percent import percent
+
+    __word_to_embedding = {}
+
+    sys.stderr.write("Reading %s...\n" % WEIGHTSFILE)
+    f = open(WEIGHTSFILE)
+    f.readline()
+    vals = [float(v) for v in string.split(f.readline())]
+    assert len(vals) == NUMBER_OF_WORDS * DIMENSIONS
+    for i in range(NUMBER_OF_WORDS):
+        l = vals[DIMENSIONS*i:DIMENSIONS*(i+1)]
+        w = __words[i]
+        __word_to_embedding[w] = l
+    __read = True
+    for w in __word_to_embedding: assert len(__word_to_embedding[__words[0]]) == len(__word_to_embedding[w])
+    sys.stderr.write("...done reading %s\n" % WEIGHTSFILE)
+
+import re
+numberre = re.compile("[0-9]")
+slashre = re.compile("\\\/")
+ 
+def preprocess_word(origw):
+    """
+    Convert a word so that it can be embedded directly.
+    Returned the preprocessed sequence.
+    @note: Preprocessing is appropriate for Penn Treebank style documents.
+    #@note: Perhaps run L{common.penntreebank.preprocess} on the word first.
+    """
+    read_embeddings()
+    if origw == "-LRB-": w = "("
+    elif origw == "-RRB-": w = ")"
+    elif origw == "-LCB-": w = "{"
+    elif origw == "-RCB-": w = "}"
+    elif origw == "-LSB-": w = "["
+    elif origw == "-RSB-": w = "]"
+    else:
+        w = origw
+        if w not in __word_to_embedding:
+            w = string.lower(w)
+            w = slashre.sub("/", w)
+            w = numberre.sub("NUMBER", w)
+#    if w not in __word_to_embedding:
+#        w = string.lower(w)
+#        w = numberre.sub("NUMBER", w)
+    if w not in __word_to_embedding:
+#        sys.stderr.write("Word not in vocabulary, using %s: %s (original %s)\n" % (UNKNOWN, w, origw))
+        w = UNKNOWN
+    assert w in __word_to_embedding
+    return w
+
+def preprocess_seq(l):
+    """
+    Convert a sequence so that it can be embedded directly.
+    Returned the preprocessed sequence.
+    @note: Preprocessing is appropriate for Penn Treebank style documents.
+    """
+    read_embeddings()
+    lnew = []
+    for origw in l:
+        w = preprocess_word(origw)
+        lnew.append(w)
+    return lnew
+
+#def convert_string(s, strict=False):
+#    """
+#    Convert a string to a sequence of embeddings.
+#    @param strict: If strict, then words *must* be in the vocabulary.
+#    @todo: DEPRECATED Remove this function.
+#    """
+#    read_embeddings()
+#    e = []
+#    for origw in string.split(string.lower(s)):
+#        w = numberre.sub("NUMBER", origw)
+#        if w in __word_to_embedding:
+#            e.append(__word_to_embedding[w])
+#        else:
+#            sys.stderr.write("Word not in vocabulary, using %s: %s (original %s)\n" % (UNKNOWN, w, origw))
+#            assert not strict
+#            e.append(__word_to_embedding[UNKNOWN])
+#    return e
+
+#def test():
+#    """
+#    Debugging code.
+#    """
+#    read_embeddings()
+#    for w in __word_to_embedding:
+#        assert len(__word_to_embedding[w]) == 50
+#    import numpy
+#    for w1 in __words:
+#        e1 = numpy.asarray(__word_to_embedding[w1])
+#        lst = []
+#        print w1, numpy.dot(e1, e1)
+#        for w2 in __word_to_embedding:
+#            if w1 >= w2: continue
+#            e2 = numpy.asarray(__word_to_embedding[w2])
+#            d = (e1 - e2)
+#            l2 = numpy.dot(d, d)
+#            lst.append((l2, w1, w2))
+#        lst.sort()
+#        print lst[:10]
+#
+#test()
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/datasets/embeddings/read-original.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/datasets/embeddings/read-original.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,47 @@
+#!/usr/bin/python
+
+import string
+#import psyco
+
+weightsfile = "lm-weights.txt"
+vocabfile = "words.asc"
+size = 30000
+dimensions = 50
+
+words = [string.strip(w) for w in open(vocabfile).readlines()]
+assert len(words) == 30000
+
+import numpy, math
+import sys
+from percent import percent
+
+word_to_vector = {}
+
+f = open(weightsfile)
+f.readline()
+vals = [float(v) for v in string.split(f.readline())]
+assert len(vals) == size * dimensions
+vals.reverse()
+for i in range(size):
+    l = vals[dimensions*i:dimensions*(i+1)]
+    w = words[i]
+    word_to_vector[w] = l
+
+#    l2 = numpy.asarray(l)
+#    print math.fabs(50 - numpy.sum(l2*l2)), w
+
+cnt = 0
+for i1 in range(len(words)):
+    for i2 in range(len(words)):
+        w1 = words[i1]
+        w2 = words[i2]
+        cnt += 1
+        if i1 <= i2: continue
+        l1 = numpy.asarray(word_to_vector[w1])
+        l2 = numpy.asarray(word_to_vector[w2])
+        d = l2 - l1
+        dist = numpy.sum(d * d)
+        if dist < 50:
+            print numpy.sum(d * d), w1, w2, i1, i2
+        if cnt % 1000 == 0:
+            sys.stderr.write("%s done...\n" % percent(cnt, len(word_to_vector) * len(word_to_vector)))
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/datasets/flickr.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/datasets/flickr.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,71 @@
+"""
+Routines to load variations on the Flickr image dataset.
+"""
+from __future__ import absolute_import
+
+import os
+import numpy
+
+from ..io import filetensor
+from .config import data_root
+from .dataset import Dataset
+
+
+path_test_10class ='flickr_10classes_test.ft'
+
+path_train_10class = 'flickr_10classes_train.ft'
+
+path_valid_10class = 'flickr_10classes_valid.ft'
+
+def basic_10class(folder = None):
+    """Return the basic flickr image classification problem.
+    The images are 75x75, and there are 7500 training examples.
+    """
+    root = os.path.join(data_root(), 'flickr') if folder is None else folder
+    train = filetensor.read(open(os.path.join(root, path_train_10class)))
+    valid = filetensor.read(open(os.path.join(root, path_valid_10class)))
+    test = filetensor.read(open(os.path.join(root, path_test_10class)))
+
+    assert train.shape[1] == 75*75 +1
+    assert valid.shape[1] == 75*75 +1
+    assert test.shape[1] == 75*75 +1
+
+    rval = Dataset()
+
+    rval.train = Dataset.Obj(
+            x=train[:, 0:-1],
+            y=numpy.asarray(train[:, -1], dtype='int64'))
+    rval.valid = Dataset.Obj(
+            x=valid[:, 0:-1],
+            y=numpy.asarray(valid[:, -1], dtype='int64'))
+    rval.test = Dataset.Obj(
+            x=test[:, 0:-1],
+            y=numpy.asarray(test[:, -1], dtype='int64'))
+
+    rval.n_classes = 10
+    rval.img_shape = (75,75)
+
+    return rval
+
+def translations_10class():
+    raise NotImplementedError('TODO')
+
+
+def render_a_few_images(n=10, prefix='flickr_img', suffix='png'):
+    #TODO: document this and move it to a more common 
+    #      place where other datasets can use it
+    from PIL import Image
+    root = os.path.join(data_root(), 'flickr')
+    valid = filetensor.read(open(os.path.join(root, path_valid_10class)))
+    assert valid.shape == (1000,75*75+1)
+    for i in xrange(n):
+        pixelarray = valid[i,0:-1].reshape((75,75)).T
+        assert numpy.all(pixelarray >= 0)
+        assert numpy.all(pixelarray <= 1)
+
+        pixel_uint8 = numpy.asarray( pixelarray * 255.0, dtype='uint8')
+        im = Image.frombuffer('L', pixel_uint8.shape, pixel_uint8.data, 'raw', 'L', 0, 1)
+        im.save(prefix + str(i) + '.' + suffix)
+        
+
+
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/datasets/make_test_datasets.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/datasets/make_test_datasets.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,118 @@
+from dataset import ArrayDataSet
+from shapeset.dset import Polygons
+from linear_regression import linear_predictor
+from kernel_regression import kernel_predictor
+from numpy import *
+
+"""
+General-purpose code to generate artificial datasets that can be used
+to test different learning algorithms.
+"""
+
+
+def make_triangles_rectangles_online_dataset(image_size=(10,10)):
+    """
+    Make a binary classification dataset to discriminate triangle images from rectangle images.
+    """
+    def convert_dataset(dset):
+        # convert the n_vert==3 into target==0 and n_vert==4 into target==1
+        def mapf(images,n_vertices):
+            n=len(n_vertices)
+            targets = ndarray((n,1),dtype='float64')
+            for i in xrange(n):
+                targets[i,0] = array([0. if n_vertices[i]==3 else 1.],dtype='float64')
+            return images.reshape(len(images),images[0].size).astype('float64'),targets
+        return dataset.ApplyFunctionDataSet(dset("image","nvert"),mapf,["input","target"])
+  
+    p=Polygons(image_size,[3,4],fg_min=1./255,fg_max=1./255,rot_max=1.,scale_min=0.35,scale_max=0.9,pos_min=0.1, pos_max=0.9)
+    trainset=convert_dataset(p)
+    return trainset
+
+
+def make_triangles_rectangles_dataset(n_examples=600,image_size=(10,10), cache = True):
+    """
+    Make a binary classification dataset to discriminate triangle images from rectangle images.
+    """
+    def convert_dataset(dset):
+        # convert the n_vert==3 into target==0 and n_vert==4 into target==1
+        def mapf(images,n_vertices):
+            n=len(n_vertices)
+            targets = ndarray((n,1),dtype='float64')
+            for i in xrange(n):
+                targets[i,0] = array([0. if n_vertices[i]==3 else 1.],dtype='float64')
+            return images.reshape(len(images),images[0].size).astype('float64'),targets
+        return dataset.CachedDataSet(dataset.ApplyFunctionDataSet(dset("image","nvert"),mapf,["input","target"]),cache)
+  
+    p=Polygons(image_size,[3,4],fg_min=1./255,fg_max=1./255,rot_max=1.,scale_min=0.35,scale_max=0.9,pos_min=0.1, pos_max=0.9)
+    data = p.subset[0:n_examples]
+    trainset=convert_dataset(data.subset[0:n_examples])
+    return trainset
+
+
+def make_triangles_rectangles_datasets(n_examples=600,train_frac=0.5,image_size=(10,10), cache = True):
+    """
+    Make two binary classification datasets to discriminate triangle images from rectangle images.
+    The first one is the training set, the second is the test set.
+    """
+    data = make_triangles_rectangles_dataset(n_examples=n_examples,image_size=image_size, cache = cache)
+    n_train = int(n_examples*train_frac)
+    trainset=convert_dataset(data.subset[0:n_train])
+    testset=convert_dataset(data.subset[n_train:n_examples])
+    return trainset,testset
+
+
+def make_artificial_datasets_from_function(n_inputs=1,
+                                           n_targets=1,
+                                           n_examples=20,
+                                           train_frac=0.5,
+                                           noise_level=0.1, # add Gaussian noise, noise_level=sigma
+                                           params_shape=None,
+                                           f=None, # function computing E[Y|X]
+                                           otherargs=None, # extra args to f
+                                           b=None): # force theta[0] with this value
+    """
+    Make regression data of the form
+      Y | X ~ Normal(f(X,theta,otherargs),noise_level^2)
+    If n_inputs==1 then X is chosen at regular locations on the [-1,1] interval.
+    Otherwise X is sampled according to a Normal(0,1) on all dimensions (independently).
+    The parameters theta is a matrix of shape params_shape that is sampled from Normal(0,1).
+    Optionally theta[0] is set to the argument 'b', if b is provided.
+
+    Return a training set and a test set, by splitting the generated n_examples
+    according to the 'train_frac'tion.
+    """
+    n_train=int(train_frac*n_examples)
+    n_test=n_examples-n_train
+    if n_inputs==1:
+        delta1=2./n_train
+        delta2=2./n_test
+        inputs = vstack((array(zip(range(n_train)))*delta1-1,
+                         0.5*delta2+array(zip(range(n_test)))*delta2-1))
+    else:
+        inputs = random.normal(size=(n_examples,n_inputs))
+    if not f:
+        f = linear_predictor
+    if f==kernel_predictor and not otherargs[1]:
+        otherargs=(otherargs[0],inputs[0:n_train])
+    if not params_shape:
+        if f==linear_predictor:
+            params_shape = (n_inputs+1,n_targets)
+        elif f==kernel_predictor:
+            params_shape = (otherargs[1].shape[0]+1,n_targets)
+    theta = random.normal(size=params_shape) if params_shape else None
+    if b:
+        theta[0]=b
+    outputs = f(inputs,theta,otherargs)
+    targets = outputs + random.normal(scale=noise_level,size=(n_examples,n_targets))
+    # the | stacking creates a strange bug in LookupList constructor:  
+    #  trainset = ArrayDataSet(inputs[0:n_examples/2],{'input':slice(0,n_inputs)}) | \
+    #             ArrayDataSet(targets[0:n_examples/2],{'target':slice(0,n_targets)}) 
+    #  testset = ArrayDataSet(inputs[n_examples/2:],{'input':slice(0,n_inputs)}) | \
+    #            ArrayDataSet(targets[n_examples/2:],{'target':slice(0,n_targets)})
+    data = hstack((inputs,targets))
+
+    trainset = ArrayDataSet(data[0:n_train],
+                            {'input':slice(0,n_inputs),'target':slice(n_inputs,n_inputs+n_targets)})
+    testset = ArrayDataSet(data[n_train:],
+                           {'input':slice(0,n_inputs),'target':slice(n_inputs,n_inputs+n_targets)})
+    return trainset,testset,theta
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/datasets/shapeset1.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/datasets/shapeset1.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,77 @@
+"""
+Routines to load/access Shapeset1
+"""
+
+from __future__ import absolute_import
+
+import os
+import numpy
+
+from ..io.amat import AMat
+from .config import data_root
+from .dataset import Dataset
+
+def _head(path, n):
+    dat = AMat(path=path, head=n)
+
+    try:
+        assert dat.input.shape[0] == n
+        assert dat.target.shape[0] == n
+    except Exception , e:
+        raise Exception("failed to read %i lines from file %s" % (n, path))
+
+    return dat.input, numpy.asarray(dat.target, dtype='int64').reshape(dat.target.shape[0])
+
+
+def head_train(n=10000):
+    """Load the first Shapeset1 training examples.
+
+    Returns two matrices: x, y.
+    x has N rows of 1024 columns.
+    Each row of x represents the 32x32 grey-scale pixels in raster order.
+    y is a vector of N integers between 0 and 2.
+    Each element y[i] is the label of the i'th row of x.
+    """
+    path = os.path.join(data_root(), 'shapeset1','shapeset1_1cspo_2_3.10000.train.shape.amat')
+    return _head(path, n)
+
+def head_valid(n=5000):
+    """Load the first Shapeset1 validation examples.
+
+    Returns two matrices: x, y.
+    x has N rows of 1024 columns.
+    Each row of x represents the 32x32 grey-scale pixels in raster order.
+    y is a vector of N integers between 0 and 2.
+    Each element y[i] is the label of the i'th row of x.
+    """
+    path = os.path.join(data_root(), 'shapeset1','shapeset1_1cspo_2_3.5000.valid.shape.amat')
+    return _head(path, n)
+
+def head_test(n=5000):
+    """Load the first Shapeset1 testing examples.
+
+    Returns two matrices: x, y.
+    x has N rows of 1024 columns.
+    Each row of x represents the 32x32 grey-scale pixels in raster order.
+    y is a vector of N integers between 0 and 2.
+    Each element y[i] is the label of the i'th row of x.
+    """
+    path = os.path.join(data_root(), 'shapeset1','shapeset1_1cspo_2_3.5000.test.shape.amat')
+    return _head(path, n)
+
+def train_valid_test(ntrain=10000, nvalid=5000, ntest=5000):
+    train_x, train_y = head_train(n=ntrain)
+    valid_x, valid_y = head_valid(n=nvalid)
+    test_x,  test_y  = head_test(n=ntest)
+
+    rval = Dataset()
+    rval.train = Dataset.Obj(x = train_x, y = train_y)
+    rval.valid = Dataset.Obj(x = valid_x, y = valid_y)
+    rval.test  = Dataset.Obj(x = test_x,  y = test_y)
+
+    rval.n_classes = 3
+    rval.img_shape = (32, 32)
+
+    return rval
+
+
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/datasets/smallNorb.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/datasets/smallNorb.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,108 @@
+import os
+import numpy
+from ..io.filetensor import read
+from .config import data_root
+
+#Path = '/u/bergstrj/pub/data/smallnorb'
+#Path = '/home/fringant2/lisa/louradoj/data/smallnorb'
+#Path = '/home/louradou/data/norb'
+
+class Paths(object):
+    """File-related operations on smallNorb
+    """
+    def __init__(self):
+        smallnorb = [data_root(), 'smallnorb']
+        self.train_dat = os.path.join(*\
+                smallnorb + ['smallnorb-5x46789x9x18x6x2x96x96-training-dat.mat'])
+        self.test_dat = os.path.join(*\
+                smallnorb + ['smallnorb-5x01235x9x18x6x2x96x96-testing-dat.mat'])
+        self.train_cat = os.path.join(*\
+                smallnorb + ['smallnorb-5x46789x9x18x6x2x96x96-training-cat.mat'])
+        self.test_cat = os.path.join(*\
+                smallnorb + ['smallnorb-5x01235x9x18x6x2x96x96-testing-cat.mat'])
+        self.train_info = os.path.join(*\
+                smallnorb + ['smallnorb-5x46789x9x18x6x2x96x96-training-info.mat'])
+        self.test_info = os.path.join(*\
+                smallnorb + ['smallnorb-5x01235x9x18x6x2x96x96-testing-info.mat'])
+
+    def load_append_train_test(self, normalize_pixels=True, downsample_amt=1, dtype='uint8'):
+        """ Load the smallNorb data into numpy matrices.
+
+        normalize_pixels True will divide the values by 255, which makes sense in conjunction
+        with dtype=float32 or dtype=float64.
+
+        """
+        def downsample(dataset):
+            return dataset[:, 0, ::downsample_amt, ::downsample_amt]
+
+        samples = downsample(read(open(self.train_dat)))
+        samples = numpy.vstack((samples, downsample(read(open(self.test_dat)))))
+        samples = numpy.asarray(samples, dtype=dtype)
+        if normalize_pixels:
+            samples *= (1.0 / 255.0)
+
+        labels = read(open(self.train_cat))
+        labels = numpy.hstack((labels, read(open(self.test_cat))))
+
+        infos = read(open(self.train_info))
+        infos = numpy.vstack((infos, read(open(self.test_info))))
+
+        return samples, labels, infos
+    
+def smallnorb_iid(ntrain=29160, nvalid=9720, ntest=9720, dtype='float64', normalize_pixels=True):
+    """Variation of the smallNorb task in which we randomly shuffle all the object instances
+    together before dividing into train/valid/test.
+
+    The default train/valid/test sizes correspond to 60/20/20 split of the entire dataset.
+
+    :returns: 5, (train_x, train_labels), (valid_x, valid_labels), (test_x, test_labels) 
+
+    """
+    # cut from /u/louradoj/theano/hpu/expcode1.py
+    rng = numpy.random.RandomState(1)        
+    samples, labels, infos = Paths().load_append_train_test(downsample_amt=3, dtype=dtype, normalize_pixels=normalize_pixels)
+
+    nsamples = samples.shape[0]
+    if ntrain + nvalid + ntest > nsamples:
+        raise Exception("ntrain+nvalid+ntest exceeds number of samples (%i)" % nsamples, 
+                (ntrain, nvalid, ntest))
+    i0 = 0
+    i1 = ntrain
+    i2 = ntrain + nvalid
+    i3 = ntrain + nvalid + ntest
+
+    indices = rng.permutation(nsamples)
+    train_rows = indices[i0:i1]
+    valid_rows = indices[i1:i2]
+    test_rows = indices[i2:i3]
+
+    n_labels = 5
+
+    def _pick_rows(rows):
+        a = numpy.array([samples[i].flatten() for i in rows])
+        b = numpy.array([labels[i] for i in rows])
+        return a, b
+
+    return [_pick_rows(r) for r in (train_rows, valid_rows, test_rows)]
+
+def smallnorb_azSplit():
+    # cut from /u/louradoj/theano/hpu/expcode1.py
+    # WARNING NOT NECESSARILY WORKING CODE
+
+    samples, labels, infos = _load_append_train_test()
+    train_rows, valid_rows, test_rows = [], [], []
+    train_rows_azimuth = []
+    for instance in range(10):
+        az_min = 4*instance
+        az_max = 4*instance + 18
+        train_rows_azimuth.append( [a % 36 for a in range(az_min,az_max,2)] )
+    #print "train_rows_azimuth", train_rows_azimuth
+    for i, info in enumerate(infos):
+        if info[2] in train_rows_azimuth[info[0]]:
+            train_rows.append(i)
+        elif info[2] / 2 % 2 == 0:
+            test_rows.append(i)
+        else:
+            valid_rows.append(i)
+
+    return [_pick_rows(samples, labels, r) for r in (train_rows, valid_rows, test_rows)]
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/datasets/testDataset.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/datasets/testDataset.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,43 @@
+"""
+Various routines to load/access MNIST data.
+"""
+from __future__ import absolute_import
+
+import os
+import numpy
+
+from ..io.amat import AMat
+from .config import data_root
+from .dataset import dataset_factory, Dataset
+
+VALSEQ, VALRAND = range(2)
+
+@dataset_factory('DEBUG')
+def mnist_factory(variant='', ntrain=10, nvalid=10, ntest=10, \
+        nclass=2, ndim=1, dshape=None, valtype=VALSEQ):
+
+    temp = []
+    [temp.append(5) for i in range(ndim)]
+    dshape = temp if dshape is None else dshape
+
+    rval = Dataset()
+    rval.n_classes = nclass
+    rval.img_shape = dshape
+
+    dsize = numpy.prod(dshape);
+
+    print ntrain, nvalid, ntest, nclass, dshape, valtype
+
+    ntot = ntrain + nvalid + ntest
+    xdata = numpy.arange(ntot*numpy.prod(dshape)).reshape((ntot,dsize)) \
+            if valtype is VALSEQ else \
+            numpy.random.random((ntot,dsize));
+    ydata = numpy.round(numpy.random.random(ntot));
+
+    rval.train = Dataset.Obj(x=xdata[0:ntrain],y=ydata[0:ntrain])
+    rval.valid = Dataset.Obj(x=xdata[ntrain:ntrain+nvalid],\
+                             y=ydata[ntrain:ntrain+nvalid])
+    rval.test =  Dataset.Obj(x=xdata[ntrain+nvalid:ntrain+nvalid+ntest],
+                             y=ydata[ntrain+nvalid:ntrain+nvalid+ntest])
+
+    return rval
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/datasets/test_tzanetakis.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/datasets/test_tzanetakis.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,22 @@
+import theano
+
+from pylearn.io import wavread
+from pylearn.datasets import tzanetakis
+
+def test_tzanetakis():
+    idx = theano.tensor.lscalar()
+
+    path, label = tzanetakis.tzanetakis_example(idx)
+    print path, label
+
+    f = theano.function([idx], [path, label])
+
+    for i in xrange(len(tzanetakis.tzanetakis_example)):
+        print i, f(i)
+
+    wav,sr = wavread.wav_read_int16(path)
+
+    f = theano.function([idx], wav)
+    for i in xrange(len(tzanetakis.tzanetakis_example)):
+        print i, f(i).shape
+
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/datasets/tzanetakis.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/datasets/tzanetakis.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,100 @@
+"""
+Load Tzanetakis' genre-classification dataset.
+
+"""
+from __future__ import absolute_import
+
+import os
+import numpy
+
+from ..io.amat import AMat
+from .config import data_root
+from .dataset import dataset_factory, Dataset
+
+def centre_data(x, inplace=False):
+    rval = x if inplace else x.copy()
+    #zero-mean
+    rval -= numpy.mean(rval, axis=0)
+    #unit-variance
+    rval *= 1.0 / (1.0e-6 + numpy.std(rval, axis=0))
+    return rval
+
+def mfcc16(segments_per_song = 1, include_covariance = True, random_split = 0,
+        ntrain = 700, nvalid = 100, ntest = 200,
+        normalize=True):
+    if segments_per_song != 1:
+        raise NotImplementedError()
+
+    path = os.path.join(data_root(), 'tzanetakis','feat_mfcc16_540_1.stat.amat')
+    dat = AMat(path=path)
+    all_input = dat.input
+    assert all_input.shape == (1000 * segments_per_song, 152)
+    all_targ = numpy.tile(numpy.arange(10).reshape(10,1), 100 * segments_per_song)\
+            .reshape(1000 * segments_per_song)
+
+    if not include_covariance:
+        all_input = all_input[:,0:16] 
+
+    #shuffle the data according to the random split
+    assert all_input.shape[0] == all_targ.shape[0]
+    seed = random_split + 1
+    numpy.random.RandomState(seed).shuffle(all_input)
+    numpy.random.RandomState(seed).shuffle(all_targ)
+
+    #construct a dataset to return
+    rval = Dataset()
+
+    def prepx(x):
+        return centre_data(x, inplace=True) if normalize else x
+
+    rval.train = Dataset.Obj(x=prepx(all_input[0:ntrain]),
+            y=all_targ[0:ntrain])
+    rval.valid = Dataset.Obj(x=prepx(all_input[ntrain:ntrain+nvalid]),
+            y=all_targ[ntrain:ntrain+nvalid])
+    rval.test =  Dataset.Obj(x=prepx(all_input[ntrain+nvalid:ntrain+nvalid+ntest]),
+            y=all_targ[ntrain+nvalid:ntrain+nvalid+ntest])
+
+    rval.n_classes = 10
+
+    return rval
+
+import theano
+
+class TzanetakisExample(theano.Op):
+    @staticmethod
+    def read_tzanetakis_file():
+        tracklist = open(data_root() + '/tzanetakis/tracklist.txt')
+        path = []
+        label = []
+        for line in tracklist:
+            toks = line.split()
+            try:
+                path.append(toks[0])
+                label.append(toks[1])
+            except:
+                print 'BAD LINE IN TZANETAKIS TRACKLIST'
+                print line, toks
+                raise
+        assert len(path) == 1000
+        return path, label
+            
+    def __init__(self):
+        self.path, self.label = self.read_tzanetakis_file()
+
+    def __len__(self):
+        return len(self.path)
+
+    def make_node(self, idx):
+        return theano.Apply(self, 
+                [theano.tensor.as_tensor_variable(idx)],
+                [theano.generic(), theano.generic()])
+
+    def perform(self, node, (idx,), outputs):
+        assert len(outputs) == 2
+        outputs[0][0] = self.path[idx]
+        outputs[1][0] = self.label[idx]
+
+    def grad(self, inputs, g_output):
+        return [None for i in inputs]
+tzanetakis_example = TzanetakisExample()
+
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/examples/linear_classifier.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/examples/linear_classifier.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,224 @@
+#! /usr/bin/env python
+"""
+T. Bertin-Mahieux (2008) University of Montreal
+bertinmt@iro.umontreal.ca
+
+linear_classifier.py
+Simple script that creates a linear_classifier, and
+learns the parameters using backpropagation.
+
+This is to illustrate how to use theano/pylearn.
+Anyone who knows how to make this script simpler/clearer is welcome to
+make the modifications.
+"""
+
+
+import os
+import sys
+import time
+import copy
+import pickle
+import numpy
+import numpy as N
+import numpy.random as NR
+from pylearn import cost
+import theano
+from theano import tensor as T
+
+
+def cost_function(*args,**kwargs) :
+    """ default cost function, quadratic """
+    return cost.quadratic(*args,**kwargs)
+
+
+class modelgraph() :
+    """ class that contains the graph of the model """
+    lr = T.scalar()                              # learning rate
+    inputs = T.matrix()                          # inputs (one example per line)
+    true_outputs = T.matrix()                    # outputs (one example per line)
+    W = T.matrix()                               # weights input * W + b= output
+    b = T.vector()                               # bias
+    outputs = T.dot(inputs,W) + b                # output, one per line
+    costs = cost_function(true_outputs,outputs)  # costs
+    g_W = T.grad(costs,W)                        # gradient of W
+    g_b = T.grad(costs,b)                        # gradient of b
+    new_W = T.sub_inplace(W, lr * g_W)           # update inplace of W
+    new_b = T.sub_inplace(b, lr * g_b)           # update inplace of b
+
+
+class model() :
+    """ 
+    The model! 
+    Contains needed matrices, needed functions, and a link to the model graph.
+    """
+
+    def __init__(self,input_size,output_size) :
+        """ init matrix and bias, creates the graph, create a dict of compiled functions """
+        # graph
+        self.graph = modelgraph()
+        # weights and bias, saved in self.params
+        seed = 666
+        r = NR.RandomState(seed)
+        W = r.uniform(size = [input_size, output_size], low = -1/N.sqrt(input_size), high = 1/N.sqrt(input_size))
+        b = numpy.zeros((output_size, ))
+        self.params = [W,b]
+        # dictionary of compiled functions
+        self.func_dict = dict()
+        # keep some init_infos (may not be necessary)
+        self.init_params = [input_size,output_size]
+
+
+    def update(self,lr,true_inputs,true_outputs) :
+        """ does an update of the model, one gradient descent """
+        # do we already have the proper theano function?
+        if self.func_dict.has_key('update_func') :
+            self.func_dict['update_func'](lr,true_inputs,true_outputs,self.params[0],self.params[1])
+            return
+        else :
+            # create the theano function, tell him what are the inputs and outputs)
+            func = theano.function([self.graph.lr,self.graph.inputs,self.graph.true_outputs,
+                                self.graph.W, self.graph.b],
+                               [self.graph.new_W,self.graph.new_b])
+            # add function to dictionary, so we don't compile it again
+            self.func_dict['update_func'] = func
+            # use this function
+            func(lr,true_inputs,true_outputs,self.params[0],self.params[1])
+            return
+    
+    def costs(self,true_inputs,true_outputs) :
+        """ get the costs for given examples, don't update """
+        # do we already have the proper theano function?
+        if self.func_dict.has_key('costs_func') :
+            return self.func_dict['costs_func'](true_inputs,true_outputs,self.params[0],self.params[1])
+        else :
+            # create the theano function, tell him what are the inputs and outputs)
+            func = theano.function([self.graph.inputs,self.graph.true_outputs,self.graph.W,self.graph.b],
+                               [self.graph.costs])
+            # add function to dictionary, se we don't compile it again
+            self.func_dict['costs_func'] = func
+            # use this function
+            return func(true_inputs,true_outputs,self.params[0],self.params[1])
+
+    def outputs(self,true_inputs) :
+        """ get the output for a set of examples (could be called 'predict') """
+        # do we already have the proper theano function?
+        if self.func_dict.has_key('outputs_func') :
+            return self.func_dict['outputs_func'](true_inputs,self.params[0],self.params[1])
+        else :
+            # create the theano function, tell him what are the inputs and outputs)
+            func = theano.function([self.graph.inputs, self.graph.W, self.graph.b],
+                               [self.graph.outputs])
+            # add function to dictionary, se we don't compile it again
+            self.func_dict['outputs_func'] = func
+            # use this function
+            return func(true_inputs,self.params[0],self.params[1])
+
+    def __getitem__(self,inputs) :
+        """ for simplicity, we can use the model this way: predictions = model[inputs] """
+        return self.outputs(inputs)
+
+    def __getstate__(self) :
+        """
+        To save/copy the model, used by pickle.dump() and by copy.deepcopy().
+        @return a dictionnary with the params (matrix + bias)
+        """
+        d = dict()
+        d['params'] = self.params
+        d['init_params'] = self.init_params
+        return d
+        
+    def __setstate__(self,d) :
+        """
+        Get the dictionary created by __getstate__(), use it to recreate the model.
+        """
+        self.params = d['params']
+        self.init_params = d['init_params']
+        self.graph = modelgraph() # we did not save the model graph 
+
+    def __str__(self) :
+        """ returns a string representing the model """
+        res = "Linear regressor, input size =",str(self.init_params[0])
+        res += ", output size =", str(self.init_params[1])
+        return res
+
+    def __equal__(self,other) :
+        """ 
+        Compares the model based on the params.
+        @return True if the params are the same, False otherwise
+        """
+        # class
+        if not isinstance(other,model) :
+            return False
+        # input size
+        if self.params[0].shape[0] != other.params[0].shape[0] :
+            return False
+        # output size
+        if self.params[0].shape[1] != other.params[0].shape[1] :
+            return False
+        # actual values
+        if not (self.params[0] == other.params[0]).all():
+            return False
+        if not (self.params[1] == other.params[1]).all():
+            return False
+        # all good
+        return True
+
+
+def die_with_usage() :
+    """ help menu """
+    print 'simple script to illustrate how to use theano/pylearn'
+    print 'to launch:'
+    print '  python linear_classifier.py -launch'
+    sys.exit(0)
+
+
+
+#************************************************************
+# main
+
+if __name__ == '__main__' :
+
+    if len(sys.argv) < 2 :
+        die_with_usage()
+
+    # print create data
+    inputs = numpy.array([[.1,.2],
+                          [.2,.8],
+                          [.9,.3],
+                          [.6,.5]])
+    outputs = numpy.array([[0],
+                           [0],
+                           [1],
+                           [1]])
+    assert inputs.shape[0] == outputs.shape[0]
+
+    # create model
+    m = model(2,1)
+    
+    # predict
+    print 'prediction before training:'
+    print m[inputs]
+
+    # update it for 100 iterations
+    for k in range(50) :
+        m.update(.1,inputs,outputs)
+
+     # predict
+    print 'prediction after training:'
+    print m[inputs]
+
+    # show points
+    import pylab as P
+    colors = outputs.flatten().tolist()
+    x = inputs[:,0]
+    y = inputs[:,1]
+    P.plot(x[numpy.where(outputs==0)[0]],y[numpy.where(outputs==0)[0]],'r+')
+    P.plot(x[numpy.where(outputs==1)[0]],y[numpy.where(outputs==1)[0]],'b+')
+    # decision line
+    p1 = (.5 - m.params[1] * 1.) / m.params[0][1,0] # abs = 0
+    p2 = (.5 - m.params[1] * 1.) / m.params[0][0,0] # ord = 0
+    P.plot((0,p2[0],2*p2[0]),(p1[0],0,-p1[0]),'g-')
+    # show
+    P.axis([-1,2,-1,2])
+    P.show()
+    
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/examples/theano_update.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/examples/theano_update.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,56 @@
+import theano
+from theano import tensor
+
+import numpy
+
+# Two scalar symbolic variables
+a = tensor.scalar()
+b = tensor.scalar()
+
+# Definition of output symbolic variable
+c = a * b
+# Definition of the function computing it
+fprop = theano.function([a,b], [c])
+
+# Initialize numerical variables
+a_val = numpy.array(12.)
+b_val = numpy.array(2.)
+print 'a_val =', a_val
+print 'b_val =', b_val
+
+# Numerical value of output is returned by the call to "fprop"
+c_val = fprop(a_val, b_val)
+print 'c_val =', c_val
+
+
+# Definition of simple update (increment by one)
+new_b = b + 1
+update = theano.function([b], [new_b])
+
+# New numerical value of b is returned by the call to "update"
+b_val = update(b_val)
+print 'new b_val =', b_val
+# We can use the new value in "fprop"
+c_val = fprop(a_val, b_val)
+print 'c_val =', c_val
+
+
+# Definition of in-place update (increment by one)
+re_new_b = tensor.add_inplace(b, 1.)
+re_update = theano.function([b], [re_new_b])
+
+# "re_update" can be used the same way as "update"
+b_val = re_update(b_val)
+print 'new b_val =', b_val
+# We can use the new value in "fprop"
+c_val = fprop(a_val, b_val)
+print 'c_val =', c_val
+
+# It is not necessary to keep the return value when the update is done in place
+re_update(b_val)
+print 'new b_val =', b_val
+c_val = fprop(a_val, b_val)
+print 'c_val =', c_val
+
+
+
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/exceptions.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/exceptions.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,7 @@
+"""
+Common exceptions.
+@todo: This file should be part of a common/ python package.
+"""
+
+class AbstractFunction (Exception): """Derived class must override this function"""
+class NotImplementedYet (NotImplementedError): """Work in progress, this should eventually be implemented"""
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/external/__init__.py
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/external/wrap_libsvm.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/external/wrap_libsvm.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,100 @@
+"""Run an experiment using libsvm.
+"""
+import numpy
+from ..datasets import make_dataset
+
+# libsvm currently has no python installation instructions/convention.
+#
+# This module uses a specific convention for libsvm's installation.
+# I base this on installing libsvm-2.88.
+# To install libsvm's python module, do three things:
+# 1. Build libsvm (run make in both the root dir and the python subdir).
+# 2. touch a '__init__.py' file in the python subdir
+# 3. add a symbolic link to a PYTHONPATH location that looks like this:
+#    libsvm -> <your root path>/libsvm-2.88/python/
+#
+# That is the sort of thing that this module expects from 'import libsvm'
+
+import libsvm
+
+def score_01(x, y, model):
+    assert len(x) == len(y)
+    size = len(x)
+    errors = 0
+    for i in range(size):
+        prediction = model.predict(x[i])
+        #probability = model.predict_probability
+        if (y[i] != prediction):
+            errors = errors + 1
+    return float(errors)/size
+
+#this is the dbdict experiment interface... if you happen to use dbdict
+class State(object):
+    #TODO: parametrize to get all the kernel types, not hardcode for RBF
+    dataset = 'MNIST_1k'
+    C = 10.0
+    kernel = 'RBF'
+    # rel_gamma is related to the procedure Jerome used. He mentioned why in
+    # quadratic_neurons/neuropaper/draft3.pdf.
+    rel_gamma = 1.0   
+
+    def __init__(self, **kwargs):
+        for k, v in kwargs:
+            setattr(self, k, type(getattr(self, k))(v))
+
+
+def dbdict_run_svm_experiment(state, channel=lambda *args, **kwargs:None):
+    """Parameters are described in state, and returned in state.
+
+    :param state: object instance to store parameters and return values
+    :param channel: not used
+
+    :returns: None
+
+    This is the kind of function that dbdict-run can use.
+
+    """
+    dataset = make_dataset(**state.dataset)
+
+
+    #libsvm needs stuff in int32 on a 32bit machine
+    #TODO: test this on a 64bit machine
+    # -> Both int32 and int64 (default) seem to be OK
+    train_y = numpy.asarray(dataset.train.y, dtype='int32')
+    valid_y = numpy.asarray(dataset.valid.y, dtype='int32')
+    test_y = numpy.asarray(dataset.test.y, dtype='int32')
+    problem = libsvm.svm_problem(train_y, dataset.train.x);
+
+    gamma0 = 0.5 / numpy.sum(numpy.var(dataset.train.x, axis=0))
+
+    param = libsvm.svm_parameter(C=state['C'],
+            kernel_type=getattr(libsvm, state['kernel']),
+            gamma=state['rel_gamma'] * gamma0)
+
+    model = libsvm.svm_model(problem, param) #this is the expensive part
+
+    state['train_01'] = score_01(dataset.train.x, train_y, model)
+    state['valid_01'] = score_01(dataset.valid.x, valid_y, model)
+    state['test_01'] = score_01(dataset.test.x, test_y, model)
+
+    state['n_train'] = len(train_y)
+    state['n_valid'] = len(valid_y)
+    state['n_test'] = len(test_y)
+
+def run_svm_experiment(**kwargs):
+    """Python-friendly interface to dbdict_run_svm_experiment
+
+    Parameters are used to construct a `State` instance, which is returned after running
+    `dbdict_run_svm_experiment` on it.
+
+    .. code-block:: python
+        results = run_svm_experiment(dataset='MNIST_1k', C=100.0, rel_gamma=0.01)
+        print results.n_train
+        # 1000
+        print results.valid_01, results.test_01
+        # 0.14, 0.10  #.. or something...
+
+    """
+    state_run_svm_experiment(state=kwargs)
+    return kwargs
+
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/io/__init__.py
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/io/amat.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/io/amat.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,138 @@
+"""load PLearn AMat files
+
+
+An AMat file is an ascii format for dense matrices.
+
+The format is not precisely defined, so I'll describe here a single recipe for making a valid
+file.
+
+.. code-block:: text
+    
+    #size: <rows> <cols>
+    #sizes: <input cols> <target cols> <weight cols> <extra cols 0> <extra cols 1> <extra cols ...>
+    number number number ....
+    number number number ....
+
+
+Tabs and spaces are both valid delimiters.  Newlines separate consecutive rows.
+
+"""
+
+import sys, numpy, array
+
+class AMat:
+    """DataSource to access a plearn amat file as a periodic unrandomized stream.
+
+    Attributes:
+
+    input -- all columns of input
+    target -- all columns of target
+    weight -- all columns of weight
+    extra -- all columns of extra
+
+    all -- the entire data contents of the amat file
+    n_examples -- the number of training examples in the file
+
+    AMat stands for Ascii Matri[x,ces]
+
+    """
+
+    marker_size = '#size:'
+    marker_sizes = '#sizes:'
+    marker_col_names = '#:'
+
+    def __init__(self, path, head=None, update_interval=0, ofile=sys.stdout):
+
+        """Load the amat at <path> into memory.
+        
+        path - str: location of amat file
+        head - int: stop reading after this many data rows
+        update_interval - int: print '.' to ofile every <this many> lines
+        ofile - file: print status, msgs, etc. to this file
+
+        """
+        self.all = None
+        self.input = None
+        self.target = None
+        self.weight = None
+        self.extra = None
+
+        self.header = False
+        self.header_size = None
+        self.header_rows = None
+        self.header_cols = None
+        self.header_sizes = None
+        self.header_col_names = []
+
+        data_started = False
+        data = array.array('d')
+        
+        f = open(path)
+        n_data_lines = 0
+        len_float_line = None
+
+        for i,line in enumerate(f):
+            if n_data_lines == head:
+                #we've read enough data, 
+                # break even if there's more in the file
+                break
+            if len(line) == 0 or line == '\n':
+                continue
+            if line[0] == '#':
+                if not data_started:
+                    #the condition means that the file has a header, and we're on 
+                    # some header line
+                    self.header = True
+                    if line.startswith(AMat.marker_size):
+                        info = line[len(AMat.marker_size):]
+                        self.header_size = [int(s) for s in info.split()]
+                        self.header_rows, self.header_cols = self.header_size
+                    if line.startswith(AMat.marker_col_names):
+                        info = line[len(AMat.marker_col_names):]
+                        self.header_col_names = info.split()
+                    elif line.startswith(AMat.marker_sizes):
+                        info = line[len(AMat.marker_sizes):]
+                        self.header_sizes = [int(s) for s in info.split()]
+            else:
+                #the first non-commented line tells us that the header is done
+                data_started = True
+                float_line = [float(s) for s in line.split()]
+                if len_float_line is None:
+                    len_float_line = len(float_line)
+                    if (self.header_cols is not None) \
+                            and self.header_cols != len_float_line:
+                        print >> sys.stderr, \
+                                'WARNING: header declared %i cols but first line has %i, using %i',\
+                                self.header_cols, len_float_line, len_float_line
+                else:
+                    if len_float_line != len(float_line):
+                        raise IOError('wrong line length', i, line)
+                data.extend(float_line)
+                n_data_lines += 1
+
+                if update_interval > 0 and (ofile is not None) \
+                        and n_data_lines % update_interval == 0:
+                    ofile.write('.')
+                    ofile.flush()
+
+        if update_interval > 0:
+            ofile.write('\n')
+        f.close()
+
+        # convert from array.array to numpy.ndarray
+        nshape = (len(data) / len_float_line, len_float_line)
+        self.all = numpy.frombuffer(data).reshape(nshape)
+        self.n_examples = self.all.shape[0]
+
+        # assign
+        if self.header_sizes is not None:
+            if len(self.header_sizes) > 4:
+                print >> sys.stderr, 'WARNING: ignoring sizes after 4th in %s' % path
+            leftmost = 0
+            #here we make use of the fact that if header_sizes has len < 4
+            # the loop will exit before 4 iterations
+            attrlist = ['input', 'target', 'weight', 'extra']
+            for attr, ncols in zip(attrlist, self.header_sizes): 
+                setattr(self, attr, self.all[:, leftmost:leftmost+ncols])
+                leftmost += ncols
+
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/io/filetensor.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/io/filetensor.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,141 @@
+"""
+Read and write the matrix file format described at
+U{http://www.cs.nyu.edu/~ylclab/data/norb-v1.0/index.html}
+
+The format is for dense tensors:
+
+    - magic number indicating type and endianness - 4bytes
+    - rank of tensor - int32
+    - dimensions - int32, int32, int32, ...
+    - <data>
+
+The number of dimensions and rank is slightly tricky: 
+    - for scalar: rank=0, dimensions = [1, 1, 1]
+    - for vector: rank=1, dimensions = [?, 1, 1]
+    - for matrix: rank=2, dimensions = [?, ?, 1]
+
+For rank >= 3, the number of dimensions matches the rank exactly.
+
+
+@todo: add complex type support
+
+"""
+import sys
+import numpy
+
+def _prod(lst):
+    p = 1
+    for l in lst:
+        p *= l
+    return p
+
+_magic_dtype = {
+        0x1E3D4C51 : ('float32', 4),
+        #0x1E3D4C52 : ('packed matrix', 0), #what is a packed matrix?
+        0x1E3D4C53 : ('float64', 8),
+        0x1E3D4C54 : ('int32', 4),
+        0x1E3D4C55 : ('uint8', 1),
+        0x1E3D4C56 : ('int16', 2),
+        }
+_dtype_magic = {
+        'float32': 0x1E3D4C51,
+        #'packed matrix': 0x1E3D4C52,
+        'float64': 0x1E3D4C53,
+        'int32': 0x1E3D4C54,
+        'uint8': 0x1E3D4C55,
+        'int16': 0x1E3D4C56
+        }
+
+#
+# TODO: implement item selection:
+#  e.g. load('some mat', subtensor=(:6, 2:5))
+#
+#  This function should be memory efficient by:
+#  - allocating an output matrix at the beginning
+#  - seeking through the file, reading subtensors from multiple places
+def read(f, subtensor=None, debug=False):
+    """Load all or part of file 'f' into a numpy ndarray
+
+    @param f: file from which to read
+    @type f: file-like object
+
+    If subtensor is not None, it should be like the argument to
+    numpy.ndarray.__getitem__.  The following two expressions should return
+    equivalent ndarray objects, but the one on the left may be faster and more
+    memory efficient if the underlying file f is big.
+
+        read(f, subtensor) <===> read(f)[*subtensor]
+    
+    Support for subtensors is currently spotty, so check the code to see if your
+    particular type of subtensor is supported.
+
+    """
+    def _read_int32(f):
+        s = f.read(4)
+        s_array = numpy.fromstring(s, dtype='int32')
+        return s_array.item()
+
+    #what is the data type of this matrix?
+    #magic_s = f.read(4)
+    #magic = numpy.fromstring(magic_s, dtype='int32')
+    magic = _read_int32(f)
+    magic_t, elsize = _magic_dtype[magic]
+    if debug: 
+        print 'header magic', magic, magic_t, elsize
+    if magic_t == 'packed matrix':
+        raise NotImplementedError('packed matrix not supported')
+
+    #what is the rank of the tensor?
+    ndim = _read_int32(f)
+    if debug: print 'header ndim', ndim
+
+    #what are the dimensions of the tensor?
+    dim = numpy.fromfile(f, dtype='int32', count=max(ndim,3))[:ndim]
+    dim_size = _prod(dim)
+    if debug: print 'header dim', dim, dim_size
+
+    rval = None
+    if subtensor is None:
+        rval = numpy.fromfile(f, dtype=magic_t, count=_prod(dim)).reshape(dim)
+    elif isinstance(subtensor, slice):
+        if subtensor.step not in (None, 1):
+            raise NotImplementedError('slice with step', subtensor.step)
+        if subtensor.start not in (None, 0):
+            bytes_per_row = _prod(dim[1:]) * elsize
+            raise NotImplementedError('slice with start', subtensor.start)
+        dim[0] = min(dim[0], subtensor.stop)
+        rval = numpy.fromfile(f, dtype=magic_t, count=_prod(dim)).reshape(dim)
+    else:
+        raise NotImplementedError('subtensor access not written yet:', subtensor) 
+
+    return rval
+
+def write(f, mat):
+    """Write a numpy.ndarray to file.
+
+    @param f: file into which to write
+    @type f: file-like object
+
+    @param mat: array to write to file
+    @type mat: numpy ndarray or compatible
+
+    """
+    def _write_int32(f, i):
+        i_array = numpy.asarray(i, dtype='int32')
+        if 0: print 'writing int32', i, i_array
+        i_array.tofile(f)
+
+    try:
+        _write_int32(f, _dtype_magic[str(mat.dtype)])
+    except KeyError:
+        raise TypeError('Invalid ndarray dtype for filetensor format', mat.dtype)
+
+    _write_int32(f, len(mat.shape))
+    shape = mat.shape
+    if len(shape) < 3:
+        shape = list(shape) + [1] * (3 - len(shape))
+    if 0: print 'writing shape =', shape
+    for sh in shape:
+        _write_int32(f, sh)
+    mat.tofile(f)
+
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/io/image_tiling.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/io/image_tiling.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,84 @@
+"""
+Illustrate filters (or data) in a grid of small image-shaped tiles.
+"""
+
+import numpy
+from PIL import Image
+
+def scale_to_unit_interval(ndar):
+    ndar = ndar.copy()
+    ndar -= ndar.min()
+    ndar *= 1.0 / ndar.max()
+    return ndar
+
+def tile_raster_images(X, img_shape, tile_shape, tile_spacing=(0,0),
+        scale_rows_to_unit_interval=True, 
+        output_pixel_vals=True
+        ):
+    """
+    Transform an array with one flattened image per row, into an array in which images are
+    reshaped and layed out like tiles on a floor.
+
+    This function is useful for visualizing datasets whose rows are images, and also columns of
+    matrices for transforming those rows (such as the first layer of a neural net).
+
+    :type X: a 2-D ndarray or a tuple of 4 channels, elements of which can be 2-D ndarrays or None
+    :param X: a 2-D array in which every row is a flattened image.
+    :type img_shape: tuple; (height, width)
+    :param img_shape: the original shape of each image
+    :type tile_shape: tuple; (rows, cols)
+    :param tile_shape: the number of images to tile (rows, cols)
+
+    :returns: array suitable for viewing as an image.  (See:`PIL.Image.fromarray`.)
+    :rtype: a 2-d array with same dtype as X.
+
+    """
+    assert len(img_shape) == 2
+    assert len(tile_shape) == 2
+    assert len(tile_spacing) == 2
+
+    out_shape = [(ishp + tsp) * tshp - tsp for ishp, tshp, tsp 
+        in zip(img_shape, tile_shape, tile_spacing)]
+
+    if isinstance(X, tuple):
+        assert len(X) == 4
+        if output_pixel_vals:
+            out_array = numpy.zeros((out_shape[0], out_shape[1], 4), dtype='uint8')
+        else:
+            out_array = numpy.zeros((out_shape[0], out_shape[1], 4), dtype=X.dtype)
+
+        #colors default to 0, alpha defaults to 1 (opaque)
+        if output_pixel_vals:
+            channel_defaults = [0,0,0,255]
+        else:
+            channel_defaults = [0.,0.,0.,1.]
+
+        for i in xrange(4):
+            if X[i] is None:
+                out_array[:,:,i] = numpy.zeros(out_shape,
+                        dtype='uint8' if output_pixel_vals else out_array.dtype
+                        )+channel_defaults[i]
+            else:
+                out_array[:,:,i] = tile_raster_images(X[i], img_shape, tile_shape, tile_spacing, scale_rows_to_unit_interval, output_pixel_vals)
+        return out_array
+
+    else:
+        H, W = img_shape
+        Hs, Ws = tile_spacing
+
+        out_array = numpy.zeros(out_shape, dtype='uint8' if output_pixel_vals else X.dtype)
+        for tile_row in xrange(tile_shape[0]):
+            for tile_col in xrange(tile_shape[1]):
+                if tile_row * tile_shape[1] + tile_col < X.shape[0]:
+                    if scale_rows_to_unit_interval:
+                        this_img = scale_to_unit_interval(X[tile_row * tile_shape[1] + tile_col].reshape(img_shape))
+                    else:
+                        this_img = X[tile_row * tile_shape[1] + tile_col].reshape(img_shape)
+                    out_array[
+                        tile_row * (H+Hs):tile_row*(H+Hs)+H,
+                        tile_col * (W+Ws):tile_col*(W+Ws)+W
+                        ] \
+                        = this_img * (255 if output_pixel_vals else 1)
+        return out_array
+
+
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/io/pmat.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/io/pmat.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,526 @@
+## Automatically adapted for numpy.numarray Jun 13, 2007 by python_numarray_to_numpy (-xsm)
+
+# PMat.py
+# Copyright (C) 2005 Pascal Vincent
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions are met:
+#
+#   1. Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#
+#   2. Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in the
+#      documentation and/or other materials provided with the distribution.
+#
+#   3. The name of the authors may not be used to endorse or promote
+#      products derived from this software without specific prior written
+#      permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
+#  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+#  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
+#  NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+#  TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+#  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+#  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+#  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+#  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+#  This file is part of the PLearn library. For more information on the PLearn
+#  library, go to the PLearn Web site at www.plearn.org
+
+
+# Author: Pascal Vincent
+
+#import numarray, sys, os, os.path
+import numpy.numarray, sys, os, os.path
+import fpconst
+
+def array_columns( a, cols ):
+    indices = None
+    if isinstance( cols, int ):
+        indices = [ cols ]
+    elif isinstance( cols, slice ):
+        #print cols
+        indices = range( *cols.indices(cols.stop) )
+    else:
+        indices = list( cols )            
+
+    return numpy.numarray.take(a, indices, axis=1)
+
+def load_pmat_as_array(fname):
+    s = file(fname,'rb').read()
+    formatstr = s[0:64]
+    datastr = s[64:]
+    structuretype, l, w, data_type, endianness = formatstr.split()
+
+    if data_type=='DOUBLE':
+        elemtype = 'd'
+    elif data_type=='FLOAT':
+        elemtype = 'f'
+    else:
+        raise ValueError('Invalid data type in file header: '+data_type)
+
+    if endianness=='LITTLE_ENDIAN':
+        byteorder = 'little'
+    elif endianness=='BIG_ENDIAN':
+        byteorder = 'big'
+    else:
+        raise ValueError('Invalid endianness in file header: '+endianness)
+
+    l = int(l)
+    w = int(w)
+    X = numpy.numarray.fromstring(datastr,elemtype, shape=(l,w) )
+    if byteorder!=sys.byteorder:
+        X.byteswap(True)
+    return X
+
+def load_pmat_as_array_dataset(fname):
+    import dataset,lookup_list
+    
+    #load the pmat as array
+    a=load_pmat_as_array(fname)
+    
+    #load the fieldnames
+    fieldnames = []
+    fieldnamefile = os.path.join(fname+'.metadata','fieldnames')
+    if os.path.isfile(fieldnamefile):
+        f = open(fieldnamefile)
+        for row in f:
+            row = row.split()
+            if len(row)>0:
+                fieldnames.append(row[0])
+        f.close()
+    else:
+        self.fieldnames = [ "field_"+str(i) for i in range(a.shape[1]) ]
+
+    return dataset.ArrayDataSet(a,lookup_list.LookupList(fieldnames,[x for x in range(a.shape[1])]))
+
+def load_amat_as_array_dataset(fname):
+    import dataset,lookup_list
+    
+    #load the amat as array
+    (a,fieldnames)=readAMat(fname)
+    
+    #load the fieldnames
+    if len(fieldnames)==0:
+        self.fieldnames = [ "field_"+str(i) for i in range(a.shape[1]) ]
+
+    return dataset.ArrayDataSet(a,lookup_list.LookupList(fieldnames,[x for x in range(a.shape[1])]))
+
+def save_array_dataset_as_pmat(fname,ds):
+    ar=ds.data
+    save_array_as_pmat(fname,ar,ds.fieldNames())
+
+def save_array_as_pmat( fname, ar, fieldnames=[] ):
+    s = file(fname,'wb')
+    
+    length, width = ar.shape
+    if fieldnames:
+        assert len(fieldnames) == width
+        metadatadir = fname+'.metadata'
+        if not os.path.isdir(metadatadir):
+            os.mkdir(metadatadir)
+        fieldnamefile = os.path.join(metadatadir,'fieldnames')
+        f = open(fieldnamefile,'wb')
+        for name in fieldnames:
+            f.write(name+'\t0\n')
+        f.close()
+    
+    header = 'MATRIX ' + str(length) + ' ' + str(width) + ' '
+    if ar.dtype.char=='d':
+        header += 'DOUBLE '
+        elemsize = 8
+
+    elif ar.dtype.char=='f':
+        header += 'FLOAT '
+        elemsize = 4
+
+    else:
+        raise TypeError('Unsupported typecode: %s' % ar.dtype.char)
+
+    rowsize = elemsize*width
+
+    if sys.byteorder=='little':
+        header += 'LITTLE_ENDIAN '
+    elif sys.byteorder=='big':
+        header += 'BIG_ENDIAN '
+    else:
+        raise TypeError('Unsupported sys.byteorder: '+repr(sys.byteorder))
+
+    header += ' '*(63-len(header))+'\n'
+    s.write( header )
+    s.write( ar.tostring() )
+    s.close()    
+
+
+#######  Iterators  ###########################################################
+
+class VMatIt:
+    def __init__(self, vmat):                
+        self.vmat = vmat
+        self.cur_row = 0
+
+    def __iter__(self):
+        return self
+
+    def next(self):
+        if self.cur_row==self.vmat.length:
+            raise StopIteration
+        row = self.vmat.getRow(self.cur_row)
+        self.cur_row += 1
+        return row    
+
+class ColumnIt:
+    def __init__(self, vmat, col):                
+        self.vmat = vmat
+        self.col  = col
+        self.cur_row = 0
+
+    def __iter__(self):
+        return self
+
+    def next(self):
+        if self.cur_row==self.vmat.length:
+            raise StopIteration
+        val = self.vmat[self.cur_row, self.col]
+        self.cur_row += 1
+        return val
+
+#######  VMat classes  ########################################################
+
+class VMat:
+    def __iter__(self):
+        return VMatIt(self)
+    
+    def __getitem__( self, key ):
+        if isinstance( key, slice ):
+            start, stop, step = key.start, key.stop, key.step
+            if step!=None:
+                raise IndexError('Extended slice with step not currently supported')
+
+            if start is None:
+                start = 0
+
+            l = self.length
+            if stop is None or stop > l:
+                stop = l
+
+            return self.getRows(start,stop-start)
+        
+        elif isinstance( key, tuple ):
+            # Basically returns a SubVMatrix
+            assert len(key) == 2
+            rows = self.__getitem__( key[0] )
+
+            shape = rows.shape                       
+            if len(shape) == 1:
+                return rows[ key[1] ]
+
+            cols = key[1]
+            if isinstance(cols, slice):
+                start, stop, step = cols.start, cols.stop, cols.step
+                if start is None:
+                    start = 0
+
+                if stop is None:
+                    stop = self.width
+                elif stop < 0:
+                    stop = self.width+stop
+
+                cols = slice(start, stop, step)
+
+            return array_columns(rows, cols)
+
+        elif isinstance( key, str ):
+            # The key is considered to be a fieldname and a column is
+            # returned.
+            try:
+                return array_columns( self.getRows(0,self.length),
+                                      self.fieldnames.index(key)  )
+            except ValueError:
+                print >>sys.stderr, "Key is '%s' while fieldnames are:" % key
+                print >>sys.stderr, self.fieldnames
+                raise
+                
+        else:
+            if key<0: key+=self.length
+            return self.getRow(key)
+
+    def getFieldIndex(self, fieldname):
+        try:
+            return self.fieldnames.index(fieldname)
+        except ValueError:
+            raise ValueError( "VMat has no field named %s. Field names: %s"
+                              %(fieldname, ','.join(self.fieldnames)) )
+
+class PMat( VMat ):
+
+    def __init__(self, fname, openmode='r', fieldnames=[], elemtype='d',
+                 inputsize=-1, targetsize=-1, weightsize=-1, array = None):
+        self.fname = fname
+        self.inputsize = inputsize
+        self.targetsize = targetsize
+        self.weightsize = weightsize
+        if openmode=='r':
+            self.f = open(fname,'rb')
+            self.read_and_parse_header()
+            self.load_fieldnames()            
+                
+        elif openmode=='w':
+            self.f = open(fname,'w+b')
+            self.fieldnames = fieldnames
+            self.save_fieldnames()
+            self.length = 0
+            self.width = len(fieldnames)
+            self.elemtype = elemtype
+            self.swap_bytes = False
+            self.write_header()            
+            
+        elif openmode=='a':
+            self.f = open(fname,'r+b')
+            self.read_and_parse_header()
+            self.load_fieldnames()
+
+        else:
+            raise ValueError("Currently only supported openmodes are 'r', 'w' and 'a': "+repr(openmode)+" is not supported")
+
+        if array is not None:
+            shape  = array.shape
+            if len(shape) == 1:
+                row_format = lambda r: [ r ]
+            elif len(shape) == 2:
+                row_format = lambda r: r
+
+            for row in array:
+                self.appendRow( row_format(row) )
+
+    def __del__(self):
+        self.close()
+
+    def write_header(self):
+        header = 'MATRIX ' + str(self.length) + ' ' + str(self.width) + ' '
+
+        if self.elemtype=='d':
+            header += 'DOUBLE '
+            self.elemsize = 8
+        elif self.elemtype=='f':
+            header += 'FLOAT '
+            self.elemsize = 4
+        else:
+            raise TypeError('Unsupported elemtype: '+repr(elemtype))
+        self.rowsize = self.elemsize*self.width
+
+        if sys.byteorder=='little':
+            header += 'LITTLE_ENDIAN '
+        elif sys.byteorder=='big':
+            header += 'BIG_ENDIAN '
+        else:
+            raise TypeError('Unsupported sys.byteorder: '+repr(sys.byteorder))
+        
+        header += ' '*(63-len(header))+'\n'
+
+        self.f.seek(0)
+        self.f.write(header)
+        
+    def read_and_parse_header(self):        
+        header = self.f.read(64)        
+        mat_type, l, w, data_type, endianness = header.split()
+        if mat_type!='MATRIX':
+            raise ValueError('Invalid file header (should start with MATRIX)')
+        self.length = int(l)
+        self.width = int(w)
+        if endianness=='LITTLE_ENDIAN':
+            byteorder = 'little'
+        elif endianness=='BIG_ENDIAN':
+            byteorder = 'big'
+        else:
+            raise ValueError('Invalid endianness in file header: '+endianness)
+        self.swap_bytes = (byteorder!=sys.byteorder)
+
+        if data_type=='DOUBLE':
+            self.elemtype = 'd'
+            self.elemsize = 8
+        elif data_type=='FLOAT':
+            self.elemtype = 'f'
+            self.elemsize = 4
+        else:
+            raise ValueError('Invalid data type in file header: '+data_type)
+        self.rowsize = self.elemsize*self.width
+
+    def load_fieldnames(self):
+        self.fieldnames = []
+        fieldnamefile = os.path.join(self.fname+'.metadata','fieldnames')
+        if os.path.isfile(fieldnamefile):
+            f = open(fieldnamefile)
+            for row in f:
+                row = row.split()
+                if len(row)>0:
+                    self.fieldnames.append(row[0])
+            f.close()
+        else:
+            self.fieldnames = [ "field_"+str(i) for i in range(self.width) ]
+            
+    def save_fieldnames(self):
+        metadatadir = self.fname+'.metadata'
+        if not os.path.isdir(metadatadir):
+            os.mkdir(metadatadir)
+        fieldnamefile = os.path.join(metadatadir,'fieldnames')
+        f = open(fieldnamefile,'wb')
+        for name in self.fieldnames:
+            f.write(name+'\t0\n')
+        f.close()
+
+    def getRow(self,i):
+        if i<0 or i>=self.length:
+            raise IndexError('PMat index out of range')
+        self.f.seek(64+i*self.rowsize)
+        data = self.f.read(self.rowsize)
+        ar = numpy.numarray.fromstring(data, self.elemtype, (self.width,))
+        if self.swap_bytes:
+            ar.byteswap(True)
+        return ar
+
+    def getRows(self,i,l):
+        if i<0 or l<0 or i+l>self.length:
+            raise IndexError('PMat index out of range')
+        self.f.seek(64+i*self.rowsize)
+        data = self.f.read(l*self.rowsize)
+        ar = numpy.numarray.fromstring(data, self.elemtype, (l,self.width))
+        if self.swap_bytes:
+            ar.byteswap(True)
+        return ar
+
+    def checkzerorow(self,i):
+        if i<0 or i>self.length:
+            raise IndexError('PMat index out of range')
+        self.f.seek(64+i*self.rowsize)
+        data = self.f.read(self.rowsize)
+        ar = numpy.numarray.fromstring(data, self.elemtype, (len(data)/self.elemsize,))
+        if self.swap_bytes:
+            ar.byteswap(True)
+        for elem in ar:
+            if elem!=0:
+                return False
+        return True
+    
+    def putRow(self,i,row):
+        if i<0 or i>=self.length:
+            raise IndexError('PMat index out of range')
+        if len(row)!=self.width:
+            raise TypeError('length of row ('+str(len(row))+ ') differs from matrix width ('+str(self.width)+')')
+        if i<0 or i>=self.length:
+            raise IndexError
+        if self.swap_bytes: # must make a copy and swap bytes
+            ar = numpy.numarray.numarray(row,type=self.elemtype)
+            ar.byteswap(True)
+        else: # asarray makes a copy if not already a numarray of the right type
+            ar = numpy.numarray.asarray(row,type=self.elemtype)
+        self.f.seek(64+i*self.rowsize)
+        self.f.write(ar.tostring())
+
+    def appendRow(self,row):
+        if len(row)!=self.width:
+            raise TypeError('length of row ('+str(len(row))+ ') differs from matrix width ('+str(self.width)+')')
+        if self.swap_bytes: # must make a copy and swap bytes
+            ar = numpy.numarray.numarray(row,type=self.elemtype)
+            ar.byteswap(True)
+        else: # asarray makes a copy if not already a numarray of the right type
+            ar = numpy.numarray.asarray(row,type=self.elemtype)
+
+        self.f.seek(64+self.length*self.rowsize)
+        self.f.write(ar.tostring())
+        self.length += 1
+        self.write_header() # update length in header
+
+    def flush(self):
+        self.f.flush()
+
+    def close(self):
+        if hasattr(self, 'f'):
+            self.f.close()
+
+    def append(self,row):
+        self.appendRow(row)
+
+    def __setitem__(self, i, row):
+        l = self.length
+        if i<0: i+=l
+        self.putRow(i,row)
+
+    def __len__(self):
+        return self.length
+
+
+
+#copied from PLEARNDIR:python_modules/plearn/vmat/readAMat.py
+def safefloat(str):
+    """Convert the given string to its float value. It is 'safe' in the sense
+    that missing values ('nan') will be properly converted to the corresponding
+    float value under all platforms, contrarily to 'float(str)'.
+    """
+    if str.lower() == 'nan':
+        return fpconst.NaN
+    else:
+        return float(str)
+
+#copied from PLEARNDIR:python_modules/plearn/vmat/readAMat.py
+def readAMat(amatname):
+    """Read a PLearn .amat file and return it as a numarray Array.
+
+    Return a tuple, with as the first argument the array itself, and as
+    the second argument the fieldnames (list of strings).
+    """
+    ### NOTE: this version is much faster than first creating the array and
+    ### updating each row as it is read...  Bizarrely enough
+    f = open(amatname)
+    a = []
+    fieldnames = []
+    for line in f:
+        if line.startswith("#size:"):
+            (length,width) = line[6:].strip().split()
+        elif line.startswith("#sizes:"):  # ignore input/target/weight/extra sizes
+            continue
+
+        elif line.startswith("#:"):
+            fieldnames = line[2:].strip().split()
+            pass
+        elif not line.startswith('#'):
+            # Add all non-comment lines.
+            row = [ safefloat(x) for x in line.strip().split() ]
+            if row:
+                a.append(row)
+
+    f.close()
+    return numpy.numarray.array(a), fieldnames
+
+            
+if __name__ == '__main__':
+    pmat = PMat( 'tmp.pmat', 'w', fieldnames=['F1', 'F2'] )
+    pmat.append( [1, 2] )
+    pmat.append( [3, 4] )
+    pmat.close()
+
+    pmat = PMat( 'tmp.pmat', 'r' )
+    ar=load_pmat_as_array('tmp.pmat')
+    ds=load_pmat_as_array_dataset('tmp.pmat')
+    
+    print "PMat",pmat
+    print "PMat",pmat[:]
+    print "array",ar
+    print "ArrayDataSet",ds
+    for i in ds:
+        print i
+    save_array_dataset_as_pmat("tmp2.pmat",ds)
+    ds2=load_pmat_as_array_dataset('tmp2.pmat')
+    for i in ds2:
+        print i
+    # print "+++ tmp.pmat contains: "
+    # os.system( 'plearn vmat cat tmp.pmat' )
+    import shutil
+    for fname in ["tmp.pmat", "tmp2.pmat"]:
+        os.remove( fname )
+        if os.path.exists( fname+'.metadata' ):
+            shutil.rmtree( fname+'.metadata' )
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/io/tests/__init__.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/io/tests/__init__.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,2 @@
+
+
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/io/tests/test_filetensor.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/io/tests/test_filetensor.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,121 @@
+
+from pylearn.io import filetensor
+import numpy
+
+import unittest
+import os
+
+class T(unittest.TestCase):
+    fname = '/tmp/some_mat'
+
+    def setUp(self):
+        #TODO: test that /tmp/some_mat does not exist
+        try:
+            os.stat(self.fname)
+        except OSError:
+            return #assume file was not found
+        raise Exception('autotest file "%s" exists!' % self.fname)
+
+    def tearDown(self):
+        os.remove(self.fname)
+
+    def test_file(self):
+        gen = numpy.random.rand(1)
+        f = file(self.fname, 'w');
+        filetensor.write(f, gen)
+        f.flush()
+        f = file(self.fname, 'r');
+        mat = filetensor.read(f, None, debug=False) #load from filename
+        self.failUnless(gen.shape == mat.shape)
+        self.failUnless(numpy.all(gen == mat))
+
+    def test_filename(self):
+        gen = numpy.random.rand(1)
+        f = file(self.fname, 'w')
+        filetensor.write(f, gen)
+        f.close()
+        f = file(self.fname, 'r')
+        mat = filetensor.read(f, None, debug=False) #load from filename
+        f.close()
+        self.failUnless(gen.shape == mat.shape)
+        self.failUnless(numpy.all(gen == mat))
+
+    def testNd(self):
+        """shape and values are stored correctly for tensors of rank 0 to 5"""
+        whole_shape = [5, 6, 7, 8, 9]
+        for i in xrange(5):
+            gen = numpy.asarray(numpy.random.rand(*whole_shape[:i]))
+            f = file(self.fname, 'w');
+            filetensor.write(f, gen)
+            f.flush()
+            f = file(self.fname, 'r');
+            mat = filetensor.read(f, None, debug=False) #load from filename
+            self.failUnless(gen.shape == mat.shape)
+            self.failUnless(numpy.all(gen == mat))
+
+    def test_dtypes(self):
+        """shape and values are stored correctly for all dtypes """
+        for dtype in filetensor._dtype_magic:
+            gen = numpy.asarray(
+                    numpy.random.rand(4, 5, 2, 1) * 100,
+                    dtype=dtype)
+            f = file(self.fname, 'w');
+            filetensor.write(f, gen)
+            f.flush()
+            f = file(self.fname, 'r');
+            mat = filetensor.read(f, None, debug=False) #load from filename
+            self.failUnless(gen.dtype == mat.dtype)
+            self.failUnless(gen.shape == mat.shape)
+            self.failUnless(numpy.all(gen == mat))
+
+    def test_dtype_invalid(self):
+        gen = numpy.zeros((3,4), dtype='uint16') #an unsupported dtype
+        f = file(self.fname, 'w')
+        passed = False
+        try:
+            filetensor.write(f, gen)
+        except TypeError, e:
+            if e[0].startswith('Invalid ndarray dtype'):
+                passed = True
+        f.close()
+        self.failUnless(passed)
+        
+
+if __name__ == '__main__':
+    unittest.main()
+
+    #a small test script, starts by reading sys.argv[1]
+    #print 'rval', rval.shape, rval.size
+
+    if 0:
+        filetensor.write(f, rval)
+        print ''
+        f.close()
+        f = file('/tmp/some_mat', 'r');
+        rval2 = filetensor.read(f) #load from file handle
+        print 'rval2', rval2.shape, rval2.size
+
+        assert rval.dtype == rval2.dtype
+        assert rval.shape == rval2.shape
+        assert numpy.all(rval == rval2)
+        print 'ok'
+
+    def _unused():
+        f.seek(0,2) #seek to end
+        f_len =  f.tell()
+        f.seek(f_data_start,0) #seek back to where we were
+
+        if debug: print 'length:', f_len
+
+
+        f_data_bytes = (f_len - f_data_start)
+
+        if debug: print 'data bytes according to header: ', dim_size * elsize
+        if debug: print 'data bytes according to file  : ', f_data_bytes
+
+        if debug: print 'reading data...'
+        sys.stdout.flush()
+
+    def read_ndarray(f, dim, dtype):
+        return numpy.fromfile(f, dtype=dtype, count=_prod(dim)).reshape(dim)
+
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/io/wavread.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/io/wavread.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,57 @@
+"""`WavRead` Op"""
+__docformat__ = "restructuredtext en"
+
+import numpy
+import theano
+import wave
+
+class WavRead(theano.Op):
+    #TODO: add the samplerate as an output
+    """Read a wave file
+
+    input - the path to a wave file
+    output - the contents of the wave file in pcm format, and the samplerate
+    
+    """
+
+    out_type = None
+    """The type for the output of this op. 
+
+    Currently only wvector (aka int16) and dvector (aka double) are supported
+    """
+
+    def __init__(self, out_type):
+        self.out_type = out_type
+        if out_type not in [theano.tensor.dvector, theano.tensor.wvector]:
+            raise TypeError(out_type)
+    def __eq__(self, other):
+        return (type(self) == type(other)) and (self.out_type == other.out_type)
+    def __hash__(self):
+        return hash(type(self)) ^ hash(self.out_type)
+    def make_node(self, path):
+        return theano.Apply(self, [path], [self.out_type(), theano.tensor.dscalar()])
+    def perform(self, node, (path,), (out, sr)):
+        w = wave.open(path)
+
+        if w.getnchannels() != 1:
+            raise NotImplementedError()
+        if w.getsampwidth() != 2: #2 bytes means 16bit samples
+            raise NotImplementedError()
+
+        samples = numpy.frombuffer(w.readframes(w.getnframes()), dtype='int16')
+
+        if self.out_type == theano.tensor.wvector:
+            out[0] = samples
+        elif self.out_type == theano.tensor.dvector:
+            out[0] = samples * (1.0 / 2**15)
+        else:
+            raise NotImplementedError()
+
+        sr[0] = w.getframerate()
+
+    def grad(self, inputs, g_output):
+        return [None for i in inputs]
+
+wav_read_int16 = WavRead(theano.tensor.wvector)
+wav_read_double = WavRead(theano.tensor.dvector)
+
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/old_dataset/_test_dataset.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/old_dataset/_test_dataset.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,680 @@
+#!/bin/env python
+from dataset import *
+from math import *
+import numpy, unittest, sys
+#from misc import *
+from lookup_list import LookupList
+
+def have_raised(to_eval, **var):
+    have_thrown = False
+    try:
+        eval(to_eval)
+    except :
+        have_thrown = True
+    return have_thrown
+
+def have_raised2(f, *args, **kwargs):
+    have_thrown = False
+    try:
+        f(*args, **kwargs)
+    except :
+        have_thrown = True
+    return have_thrown
+
+def test1():
+    print "test1"
+    global a,ds
+    a = numpy.random.rand(10,4)
+    print a
+    ds = ArrayDataSet(a,{'x':slice(3),'y':3,'z':[0,2]})
+    print "len(ds)=",len(ds)
+    assert(len(ds)==10)
+    print "example 0 = ",ds[0]
+#    assert
+    print "x=",ds["x"]
+    print "x|y"
+    for x,y in ds("x","y"):
+        print x,y
+    minibatch_iterator = ds.minibatches(fieldnames=['z','y'],n_batches=1,minibatch_size=3,offset=4)
+    minibatch = minibatch_iterator.__iter__().next()
+    print "minibatch=",minibatch
+    for var in minibatch:
+        print "var=",var
+    print "take a slice and look at field y",ds[1:6:2]["y"]
+
+    del a,ds,x,y,minibatch_iterator,minibatch,var
+
+def test_iterate_over_examples(array,ds):
+#not in doc!!!
+    i=0
+    for example in range(len(ds)):
+        wanted = array[example][:3]
+        returned = ds[example]['x']
+        if (wanted != returned).all():
+            print 'returned:', returned
+            print 'wanted:', wanted
+        assert (ds[example]['x']==array[example][:3]).all()
+        assert ds[example]['y']==array[example][3]
+        assert (ds[example]['z']==array[example][[0,2]]).all()
+        i+=1
+    assert i==len(ds)
+    del example,i
+
+#     - for example in dataset:
+    i=0
+    for example in ds:
+        assert len(example)==3
+        assert (example['x']==array[i][:3]).all()
+        assert example['y']==array[i][3]
+        assert (example['z']==array[i][0:3:2]).all()
+        assert (numpy.append(example['x'],example['y'])==array[i]).all()
+        i+=1
+    assert i==len(ds)
+    del example,i
+
+#     - for val1,val2,... in dataset:
+    i=0
+    for x,y,z in ds:
+        assert (x==array[i][:3]).all()
+        assert y==array[i][3]
+        assert (z==array[i][0:3:2]).all()
+        assert (numpy.append(x,y)==array[i]).all()
+        i+=1
+    assert i==len(ds)
+    del x,y,z,i
+
+#     - for example in dataset(field1, field2,field3, ...):
+    i=0
+    for example in ds('x','y','z'):
+        assert len(example)==3
+        assert (example['x']==array[i][:3]).all()
+        assert example['y']==array[i][3]
+        assert (example['z']==array[i][0:3:2]).all()
+        assert (numpy.append(example['x'],example['y'])==array[i]).all()
+        i+=1
+    assert i==len(ds)
+    del example,i
+    i=0
+    for example in ds('y','x'):
+        assert len(example)==2
+        assert (example['x']==array[i][:3]).all()
+        assert example['y']==array[i][3]
+        assert (numpy.append(example['x'],example['y'])==array[i]).all()
+        i+=1
+    assert i==len(ds)
+    del example,i
+
+#     - for val1,val2,val3 in dataset(field1, field2,field3):
+    i=0
+    for x,y,z in ds('x','y','z'):
+        assert (x==array[i][:3]).all()
+        assert y==array[i][3]
+        assert (z==array[i][0:3:2]).all()
+        assert (numpy.append(x,y)==array[i]).all()
+        i+=1
+    assert i==len(ds)
+    del x,y,z,i
+    i=0
+    for y,x in ds('y','x',):
+        assert (x==array[i][:3]).all()
+        assert y==array[i][3]
+        assert (numpy.append(x,y)==array[i]).all()
+        i+=1
+    assert i==len(ds)
+    del x,y,i
+
+    def test_minibatch_size(minibatch,minibatch_size,len_ds,nb_field,nb_iter_finished):
+        ##full minibatch or the last minibatch
+        for idx in range(nb_field):
+            test_minibatch_field_size(minibatch[idx],minibatch_size,len_ds,nb_iter_finished)
+        del idx
+    def test_minibatch_field_size(minibatch_field,minibatch_size,len_ds,nb_iter_finished):
+        assert len(minibatch_field)==minibatch_size or ((nb_iter_finished*minibatch_size+len(minibatch_field))==len_ds and len(minibatch_field)<minibatch_size)
+
+#     - for minibatch in dataset.minibatches([field1, field2, ...],minibatch_size=N):
+    i=0
+    mi=0
+    size=3
+    m=ds.minibatches(['x','z'], minibatch_size=size)
+    assert hasattr(m,'__iter__')
+    for minibatch in m:
+        assert isinstance(minibatch,LookupList)
+        assert len(minibatch)==2
+        test_minibatch_size(minibatch,size,len(ds),2,mi)
+        if type(ds)==ArrayDataSet:
+            assert (minibatch[0][:,::2]==minibatch[1]).all()
+        else:
+            for j in xrange(len(minibatch[0])):
+                (minibatch[0][j][::2]==minibatch[1][j]).all()
+        mi+=1
+        i+=len(minibatch[0])
+    assert i==(len(ds)/size)*size
+    assert mi==(len(ds)/size)
+    del minibatch,i,m,mi,size
+
+    i=0
+    mi=0
+    size=3
+    m=ds.minibatches(['x','y'], minibatch_size=size)
+    assert hasattr(m,'__iter__')
+    for minibatch in m:
+        assert isinstance(minibatch,LookupList)
+        assert len(minibatch)==2
+        test_minibatch_size(minibatch,size,len(ds),2,mi)
+        mi+=1
+        for id in range(len(minibatch[0])):
+            assert (numpy.append(minibatch[0][id],minibatch[1][id])==array[i]).all()
+            i+=1
+    assert i==(len(ds)/size)*size
+    assert mi==(len(ds)/size)
+    del minibatch,i,id,m,mi,size
+
+#     - for mini1,mini2,mini3 in dataset.minibatches([field1, field2, field3], minibatch_size=N):
+    i=0
+    mi=0
+    size=3
+    m=ds.minibatches(['x','z'], minibatch_size=size)
+    assert hasattr(m,'__iter__')
+    for x,z in m:
+        test_minibatch_field_size(x,size,len(ds),mi)
+        test_minibatch_field_size(z,size,len(ds),mi)
+        for id in range(len(x)):
+            assert (x[id][::2]==z[id]).all()
+            i+=1
+        mi+=1
+    assert i==(len(ds)/size)*size
+    assert mi==(len(ds)/size)
+    del x,z,i,m,mi,size
+
+    i=0
+    mi=0
+    size=3
+    m=ds.minibatches(['x','y'], minibatch_size=3)
+    assert hasattr(m,'__iter__')
+    for x,y in m:
+        assert len(x)==size
+        assert len(y)==size
+        test_minibatch_field_size(x,size,len(ds),mi)
+        test_minibatch_field_size(y,size,len(ds),mi)
+        mi+=1
+        for id in range(len(x)):
+            assert (numpy.append(x[id],y[id])==array[i]).all()
+            i+=1
+    assert i==(len(ds)/size)*size
+    assert mi==(len(ds)/size)
+    del x,y,i,id,m,mi,size
+
+#not in doc
+    i=0
+    size=3
+    m=ds.minibatches(['x','y'],n_batches=1,minibatch_size=size,offset=4)
+    assert hasattr(m,'__iter__')
+    for x,y in m:
+        assert len(x)==size
+        assert len(y)==size
+        for id in range(size):
+            assert (numpy.append(x[id],y[id])==array[i+4]).all()
+            i+=1
+    assert i==size
+    del x,y,i,id,m,size
+
+    i=0
+    size=3
+    m=ds.minibatches(['x','y'],n_batches=2,minibatch_size=size,offset=4)
+    assert hasattr(m,'__iter__')
+    for x,y in m:
+        assert len(x)==size
+        assert len(y)==size
+        for id in range(size):
+            assert (numpy.append(x[id],y[id])==array[i+4]).all()
+            i+=1
+    assert i==2*size
+    del x,y,i,id,m,size
+
+    i=0
+    size=3
+    m=ds.minibatches(['x','y'],n_batches=20,minibatch_size=size,offset=4)
+    assert hasattr(m,'__iter__')
+    for x,y in m:
+        assert len(x)==size
+        assert len(y)==size
+        for id in range(size):
+            assert (numpy.append(x[id],y[id])==array[(i+4)%array.shape[0]]).all()
+            i+=1
+    assert i==2*size # should not wrap
+    del x,y,i,id,size
+
+    assert have_raised2(ds.minibatches,['x','y'],n_batches=1,minibatch_size=len(array)+1,offset=0)
+    assert not have_raised2(ds.minibatches,['x','y'],n_batches=1,minibatch_size=len(array),offset=0)
+
+def test_ds_iterator(array,iterator1,iterator2,iterator3):
+    l=len(iterator1)
+    i=0
+    for x,y in iterator1:
+        assert (x==array[i][:3]).all()
+        assert y==array[i][3]
+        assert (numpy.append(x,y)==array[i]).all()
+        i+=1
+    assert i==l
+    i=0
+    for y,z in iterator2:
+        assert y==array[i][3]
+        assert (z==array[i][0:3:2]).all()
+        i+=1
+    assert i==l
+    i=0
+    for x,y,z in iterator3:
+        assert (x==array[i][:3]).all()
+        assert y==array[i][3]
+        assert (z==array[i][0:3:2]).all()
+        assert (numpy.append(x,y)==array[i]).all()
+        i+=1
+    assert i==l
+    
+def test_getitem(array,ds):
+    def test_ds(orig,ds,index):
+        i=0
+        assert isinstance(ds,LookupList)
+        assert len(ds)==3
+        assert len(ds[0])==len(index)
+#        for x,z,y in ds('x','z','y'):
+        for idx in index:
+            assert (orig[idx]['x']==array[idx][:3]).all()
+            assert (orig[idx]['x']==ds['x'][i]).all()
+            assert orig[idx]['y']==array[idx][3]
+            assert (orig[idx]['y']==ds['y'][i]).all() # why does it crash sometimes?
+            assert (orig[idx]['z']==array[idx][0:3:2]).all()
+            assert (orig[idx]['z']==ds['z'][i]).all()
+            i+=1
+        del i
+        ds[0]
+        if len(ds)>2:
+            ds[:1]
+            ds[1:1]
+            ds[1:1:1]
+        if len(ds)>5:
+            ds[[1,2,3]]
+        for x in ds:
+            pass
+
+#ds[:n] returns a LookupList with the n first examples.
+    ds2=ds[:3]
+    test_ds(ds,ds2,index=[0,1,2])
+    del ds2
+
+#ds[i:j] returns a LookupList with examples i,i+1,...,j-1.
+    ds2=ds[1:3]
+    test_ds(ds,ds2,index=[1,2])
+    del ds2
+
+#ds[i1:i2:s] returns a LookupList with the examples i1,i1+s,...i2-s.
+    ds2=ds[1:7:2]
+    test_ds(ds,ds2,[1,3,5])
+    del ds2
+
+#ds[i] returns the (i+1)-th example of the dataset.
+    ds2=ds[5]
+    assert isinstance(ds2,Example)
+    test_ds(ds,ds2,[5])
+    assert have_raised("var['ds']["+str(len(ds))+"]",ds=ds)  # index not defined
+    assert not have_raised("var['ds']["+str(len(ds)-1)+"]",ds=ds)
+    del ds2
+
+#ds[[i1,i2,...in]]# returns a ds with examples i1,i2,...in.
+    ds2=ds[[4,7,2,8]]
+#    assert isinstance(ds2,DataSet)
+    test_ds(ds,ds2,[4,7,2,8])
+    del ds2
+
+    #ds.<property># returns the value of a property associated with
+      #the name <property>. The following properties should be supported:
+      #    - 'description': a textual description or name for the ds
+      #    - 'fieldtypes': a list of types (one per field)
+
+    #* ds1 | ds2 | ds3 == ds.hstack([ds1,ds2,ds3])#????
+        #assert hstack([ds('x','y'),ds('z')])==ds
+        #hstack([ds('z','y'),ds('x')])==ds
+    assert have_raised2(hstack,[ds('x'),ds('x')])
+    assert have_raised2(hstack,[ds('y','x'),ds('x')])
+    assert not have_raised2(hstack,[ds('x'),ds('y')])
+        
+    #        i=0
+    #        for example in hstack([ds('x'),ds('y'),ds('z')]):
+    #            example==ds[i]
+    #            i+=1 
+    #        del i,example
+    #* ds1 & ds2 & ds3 == ds.vstack([ds1,ds2,ds3])#????
+
+def test_subset(array,ds):
+    def test_ds(orig,ds,index):
+        i=0
+        assert isinstance(ds2,DataSet)
+        assert len(ds)==len(index)
+        for x,z,y in ds('x','z','y'):
+            assert (orig[index[i]]['x']==array[index[i]][:3]).all()
+            assert (orig[index[i]]['x']==x).all()
+            assert orig[index[i]]['y']==array[index[i]][3]
+            assert orig[index[i]]['y']==y
+            assert (orig[index[i]]['z']==array[index[i]][0:3:2]).all()
+            assert (orig[index[i]]['z']==z).all()
+            i+=1
+        del i
+        ds[0]
+        if len(ds)>2:
+            ds[:1]
+            ds[1:1]
+            ds[1:1:1]
+        if len(ds)>5:
+            ds[[1,2,3]]
+        for x in ds:
+            pass
+
+#ds[:n] returns a dataset with the n first examples.
+    ds2=ds.subset[:3]
+    test_ds(ds,ds2,index=[0,1,2])
+#    del ds2
+
+#ds[i1:i2:s]# returns a ds with the examples i1,i1+s,...i2-s.
+    ds2=ds.subset[1:7:2]
+    test_ds(ds,ds2,[1,3,5])
+#     del ds2
+
+# #ds[i]
+#     ds2=ds.subset[5]
+#     assert isinstance(ds2,Example)
+#     assert have_raised("var['ds']["+str(len(ds))+"]",ds=ds)  # index not defined
+#     assert not have_raised("var['ds']["+str(len(ds)-1)+"]",ds=ds)
+#     del ds2
+
+#ds[[i1,i2,...in]]# returns a ds with examples i1,i2,...in.
+    ds2=ds.subset[[4,7,2,8]]
+    test_ds(ds,ds2,[4,7,2,8])
+#     del ds2
+
+#ds.<property># returns the value of a property associated with
+  #the name <property>. The following properties should be supported:
+  #    - 'description': a textual description or name for the ds
+  #    - 'fieldtypes': a list of types (one per field)
+
+#* ds1 | ds2 | ds3 == ds.hstack([ds1,ds2,ds3])#????
+    #assert hstack([ds('x','y'),ds('z')])==ds
+    #hstack([ds('z','y'),ds('x')])==ds
+    assert have_raised2(hstack,[ds('x'),ds('x')])
+    assert have_raised2(hstack,[ds('y','x'),ds('x')])
+    assert not have_raised2(hstack,[ds('x'),ds('y')])
+    
+#        i=0
+#        for example in hstack([ds('x'),ds('y'),ds('z')]):
+#            example==ds[i]
+#            i+=1 
+#        del i,example
+#* ds1 & ds2 & ds3 == ds.vstack([ds1,ds2,ds3])#????
+
+def test_fields_fct(ds):
+    #@todo, fill correctly
+    assert len(ds.fields())==3
+    i=0
+    v=0
+    for field in ds.fields():
+        for field_value in field: # iterate over the values associated to that field for all the ds examples
+            v+=1
+        i+=1
+    assert i==3
+    assert v==3*10
+    del i,v
+    
+    i=0
+    v=0
+    for field in ds('x','z').fields():
+        i+=1
+        for val in field:
+            v+=1
+    assert i==2
+    assert v==2*10
+    del i,v
+    
+    i=0
+    v=0
+    for field in ds.fields('x','y'):
+        i+=1
+        for val in field:
+            v+=1
+    assert i==2
+    assert v==2*10
+    del i,v
+    
+    i=0
+    v=0
+    for field_examples in ds.fields():
+        for example_value in field_examples:
+            v+=1
+        i+=1
+    assert i==3
+    assert v==3*10
+    del i,v
+    
+    assert ds == ds.fields().examples()
+    assert len(ds('x','y').fields()) == 2
+    assert len(ds('x','z').fields()) == 2
+    assert len(ds('y').fields()) == 1
+
+    del field
+
+def test_overrides(ds) :
+    """ Test for examples that an override __getitem__ acts as the one in DataSet """
+    def ndarray_list_equal(nda,l) :
+        """ 
+        Compares if a ndarray is the same as the list. Do it by converting the list into
+        an numpy.ndarray, if possible
+        """
+        try :
+            l = numpy.asmatrix(l)
+        except :
+            return False
+        return smart_equal(nda,l)
+        
+    def smart_equal(a1,a2) :
+        """
+        Handles numpy.ndarray, LookupList, and basic containers
+        """
+        if not isinstance(a1,type(a2)) and not isinstance(a2,type(a1)):
+            #special case: matrix vs list of arrays
+            if isinstance(a1,numpy.ndarray) :
+                return ndarray_list_equal(a1,a2)
+            elif isinstance(a2,numpy.ndarray) :
+                return ndarray_list_equal(a2,a1)
+            return False
+        # compares 2 numpy.ndarray
+        if isinstance(a1,numpy.ndarray):
+            if len(a1.shape) != len(a2.shape):
+                return False
+            for k in range(len(a1.shape)) :
+                if a1.shape[k] != a2.shape[k]:
+                    return False
+            return (a1==a2).all()
+        # compares 2 lookuplists
+        if isinstance(a1,LookupList) :
+            if len(a1._names) != len(a2._names) :
+                return False
+            for k in a1._names :
+                if k not in a2._names :
+                    return False
+                if not smart_equal(a1[k],a2[k]) :
+                    return False
+            return True
+        # compares 2 basic containers
+        if hasattr(a1,'__len__'):
+            if len(a1) != len(a2) :
+                return False
+            for k in range(len(a1)) :
+                if not smart_equal(a1[k],a2[k]):
+                    return False
+            return True
+        # try basic equals
+        return a1 is a2
+
+    def mask(ds) :
+        class TestOverride(type(ds)):
+            def __init__(self,ds) :
+                self.ds = ds
+            def __getitem__(self,key) :
+                res1 = self.ds[key]
+                res2 = DataSet.__getitem__(ds,key)
+                assert smart_equal(res1,res2)
+                return res1
+        return TestOverride(ds)
+    # test getitem
+    ds2 = mask(ds)
+    for k in range(10):
+        res = ds2[k]
+    res = ds2[1:len(ds):3]
+    
+        
+
+    
+
+
+def test_all(array,ds):
+    assert len(ds)==10
+    test_iterate_over_examples(array, ds)
+    test_overrides(ds)
+    test_getitem(array, ds)
+    test_subset(array, ds)
+    test_ds_iterator(array,ds('x','y'),ds('y','z'),ds('x','y','z'))
+    test_fields_fct(ds)
+
+
+class T_DataSet(unittest.TestCase):
+    def test_ArrayDataSet(self):
+        #don't test stream
+        #tested only with float value
+        #don't always test with y
+        #don't test missing value
+        #don't test with tuple
+        #don't test proterties
+        a2 = numpy.random.rand(10,4)
+        ds = ArrayDataSet(a2,{'x':slice(3),'y':3,'z':[0,2]})###???tuple not tested
+        ds = ArrayDataSet(a2,Example(['x','y','z'],[slice(3),3,[0,2]]))###???tuple not tested
+        #assert ds==a? should this work?
+
+        test_all(a2,ds)
+
+        del a2, ds
+
+    def test_CachedDataSet(self):
+        a = numpy.random.rand(10,4)
+        ds1 = ArrayDataSet(a,Example(['x','y','z'],[slice(3),3,[0,2]]))###???tuple not tested
+        ds2 = CachedDataSet(ds1)
+        ds3 = CachedDataSet(ds1,cache_all_upon_construction=True)
+
+        test_all(a,ds2)
+        test_all(a,ds3)
+
+        del a,ds1,ds2,ds3
+
+
+    def test_DataSetFields(self):
+        raise NotImplementedError()
+
+    def test_ApplyFunctionDataSet(self):
+        a = numpy.random.rand(10,4)
+        a2 = a+1
+        ds1 = ArrayDataSet(a,Example(['x','y','z'],[slice(3),3,[0,2]]))###???tuple not tested
+
+        ds2 = ApplyFunctionDataSet(ds1,lambda x,y,z: (x+1,y+1,z+1), ['x','y','z'],minibatch_mode=False)
+        ds3 = ApplyFunctionDataSet(ds1,lambda x,y,z: (numpy.array(x)+1,numpy.array(y)+1,numpy.array(z)+1),
+                                   ['x','y','z'],
+                                   minibatch_mode=True)
+
+        test_all(a2,ds2)
+        test_all(a2,ds3)
+
+        del a,ds1,ds2,ds3
+
+    def test_FieldsSubsetDataSet(self):
+        a = numpy.random.rand(10,4)
+        ds = ArrayDataSet(a,Example(['x','y','z','w'],[slice(3),3,[0,2],0]))
+        ds = FieldsSubsetDataSet(ds,['x','y','z'])
+
+        test_all(a,ds)
+
+        del a, ds
+
+    def test_RenamedFieldsDataSet(self):
+        a = numpy.random.rand(10,4)
+        ds = ArrayDataSet(a,Example(['x1','y1','z1','w1'],[slice(3),3,[0,2],0]))
+        ds = RenamedFieldsDataSet(ds,['x1','y1','z1'],['x','y','z'])
+
+        test_all(a,ds)
+
+        del a, ds
+
+    def test_MinibatchDataSet(self):
+        raise NotImplementedError()
+    def test_HStackedDataSet(self):
+        raise NotImplementedError()
+    def test_VStackedDataSet(self):
+        raise NotImplementedError()
+    def test_ArrayFieldsDataSet(self):
+        raise NotImplementedError()
+
+
+class T_Exotic1(unittest.TestCase):
+    class DataSet(DataSet):
+            """ Dummy dataset, where one field is a ndarray of variables size. """
+            def __len__(self) :
+                return 100
+            def fieldNames(self) :
+                return 'input','target','name'
+            def minibatches_nowrap(self,fieldnames,minibatch_size,n_batches,offset):
+                class MultiLengthDataSetIterator(object):
+                    def __init__(self,dataset,fieldnames,minibatch_size,n_batches,offset):
+                        if fieldnames is None: fieldnames = dataset.fieldNames()
+                        self.minibatch = Example(fieldnames,range(len(fieldnames)))
+                        self.dataset, self.minibatch_size, self.current = dataset, minibatch_size, offset
+                    def __iter__(self):
+                            return self
+                    def next(self):
+                        for k in self.minibatch._names :
+                            self.minibatch[k] = []
+                        for ex in range(self.minibatch_size) :
+                            if 'input' in self.minibatch._names:
+                                self.minibatch['input'].append( numpy.array( range(self.current + 1) ) )
+                            if 'target' in self.minibatch._names:
+                                self.minibatch['target'].append( self.current % 2 )
+                            if 'name' in self.minibatch._names:
+                                self.minibatch['name'].append( str(self.current) )
+                            self.current += 1
+                        return self.minibatch
+                return MultiLengthDataSetIterator(self,fieldnames,minibatch_size,n_batches,offset)
+    
+    def test_ApplyFunctionDataSet(self):
+        ds = T_Exotic1.DataSet()
+        dsa = ApplyFunctionDataSet(ds,lambda x,y,z: (x[-1],y*10,int(z)),['input','target','name'],minibatch_mode=False) #broken!!!!!!
+        for k in range(len(dsa)):
+            res = dsa[k]
+            self.failUnless(ds[k]('input')[0][-1] == res('input')[0] , 'problem in first applied function')
+        res = dsa[33:96:3]
+          
+    def test_CachedDataSet(self):
+        ds = T_Exotic1.DataSet()
+        dsc = CachedDataSet(ds)
+        for k in range(len(dsc)) :
+            self.failUnless(numpy.all( dsc[k]('input')[0] == ds[k]('input')[0] ) , (dsc[k],ds[k]) )
+        res = dsc[:]
+
+if __name__=='__main__':
+    tests = []
+    debug=False
+    if len(sys.argv)==1:
+        unittest.main()
+    else:
+        assert sys.argv[1]=="--debug"
+        for arg in sys.argv[2:]:
+            tests.append(arg)
+        if tests:
+            unittest.TestSuite(map(T_DataSet, tests)).debug()
+        else:
+            module = __import__("_test_dataset")
+            tests = unittest.TestLoader().loadTestsFromModule(module)
+            tests.debug()
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/old_dataset/_test_lookup_list.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/old_dataset/_test_lookup_list.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,24 @@
+from lookup_list import *
+import unittest
+
+class T_LookUpList(unittest.TestCase):
+    def test_LookupList(self):
+        #test only the example in the doc???
+        example = LookupList(['x','y','z'],[1,2,3])
+        example['x'] = [1, 2, 3] # set or change a field
+        x, y, z = example
+        x = example[0]
+        x = example["x"]
+        assert example.keys()==['x','y','z']
+        assert example.values()==[[1,2,3],2,3]
+        assert example.items()==[('x',[1,2,3]),('y',2),('z',3)]
+        example.append_keyval('u',0) # adds item with name 'u' and value 0
+        assert len(example)==4 # number of items = 4 here
+        example2 = LookupList(['v','w'], ['a','b'])
+        example3 = LookupList(['x','y','z','u','v','w'], [[1, 2, 3],2,3,0,'a','b'])
+        assert example+example2==example3
+        self.assertRaises(AssertionError,example.__add__,example)
+        del example, example2, example3, x, y ,z
+
+if __name__=='__main__':
+    unittest.main()
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/old_dataset/dataset.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/old_dataset/dataset.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,1533 @@
+
+from lookup_list import LookupList as Example
+from common.misc import unique_elements_list_intersection
+from string import join
+from sys import maxint
+import numpy, copy
+
+from exceptions import *
+
+class AttributesHolder(object):
+    def __init__(self): pass
+
+    def attributeNames(self):
+        raise AbstractFunction()
+
+    def setAttributes(self,attribute_names,attribute_values,make_copies=False):
+        """
+        Allow the attribute_values to not be a list (but a single value) if the attribute_names is of length 1.
+        """
+        if len(attribute_names)==1 and not (isinstance(attribute_values,list) or isinstance(attribute_values,tuple) ):
+            attribute_values = [attribute_values]
+        if make_copies:
+            for name,value in zip(attribute_names,attribute_values):
+                self.__setattr__(name,copy.deepcopy(value))
+        else:
+            for name,value in zip(attribute_names,attribute_values):
+                self.__setattr__(name,value)
+
+    def getAttributes(self,attribute_names=None, return_copy=False):
+        """
+        Return all (if attribute_names=None, in the order of attributeNames()) or a specified subset of attributes.
+        """
+        if attribute_names is None:
+            attribute_names = self.attributeNames()
+        if return_copy:
+            return [copy.copy(self.__getattribute__(name)) for name in attribute_names]
+        else:
+            return [self.__getattribute__(name) for name in attribute_names]
+    
+class DataSet(AttributesHolder):
+    """A virtual base class for datasets.
+
+    A DataSet can be seen as a generalization of a matrix, meant to be used in conjunction
+    with learning algorithms (for training and testing them): rows/records are called examples, and
+    columns/attributes are called fields. The field value for a particular example can be an arbitrary
+    python object, which depends on the particular dataset.
+    
+    We call a DataSet a 'stream' when its length is unbounded (in which case its __len__ method
+    should return sys.maxint).
+
+    A DataSet is a generator of iterators; these iterators can run through the
+    examples or the fields in a variety of ways.  A DataSet need not necessarily have a finite
+    or known length, so this class can be used to interface to a 'stream' which
+    feeds on-line learning (however, as noted below, some operations are not
+    feasible or not recommended on streams).
+
+    To iterate over examples, there are several possibilities:
+     - for example in dataset:
+     - for val1,val2,... in dataset:
+     - for example in dataset(field1, field2,field3, ...):
+     - for val1,val2,val3 in dataset(field1, field2,field3):
+     - for minibatch in dataset.minibatches([field1, field2, ...],minibatch_size=N):
+     - for mini1,mini2,mini3 in dataset.minibatches([field1, field2, field3], minibatch_size=N):
+     Each of these is documented below. All of these iterators are expected
+     to provide, in addition to the usual 'next()' method, a 'next_index()' method
+     which returns a non-negative integer pointing to the position of the next
+     example that will be returned by 'next()' (or of the first example in the
+     next minibatch returned). This is important because these iterators
+     can wrap around the dataset in order to do multiple passes through it,
+     in possibly unregular ways if the minibatch size is not a divisor of the
+     dataset length.
+    
+    To iterate over fields, one can do
+     - for field in dataset.fields():
+         for field_value in field: # iterate over the values associated to that field for all the dataset examples
+     - for field in dataset(field1,field2,...).fields() to select a subset of fields
+     - for field in dataset.fields(field1,field2,...) to select a subset of fields
+    and each of these fields is iterable over the examples:
+     - for field_examples in dataset.fields():
+        for example_value in field_examples:
+           ...
+    but when the dataset is a stream (unbounded length), it is not recommended to do 
+    such things because the underlying dataset may refuse to access the different fields in
+    an unsynchronized ways. Hence the fields() method is illegal for streams, by default.
+    The result of fields() is a L{DataSetFields} object, which iterates over fields,
+    and whose elements are iterable over examples. A DataSetFields object can
+    be turned back into a DataSet with its examples() method::
+      dataset2 = dataset1.fields().examples()
+    and dataset2 should behave exactly like dataset1 (in fact by default dataset2==dataset1).
+    
+    Note: Fields are not mutually exclusive, i.e. two fields can overlap in their actual content.
+
+    Note: The content of a field can be of any type. Field values can also be 'missing'
+    (e.g. to handle semi-supervised learning), and in the case of numeric (numpy array)
+    fields (i.e. an ArrayFieldsDataSet), NaN plays the role of a missing value. 
+    What about non-numeric values? None.
+
+    Dataset elements can be indexed and sub-datasets (with a subset
+    of examples) can be extracted. These operations are not supported
+    by default in the case of streams.
+
+     - dataset[:n] returns an Example with the n first examples.
+
+     - dataset[i1:i2:s] returns an Example with the examples i1,i1+s,...i2-s.
+
+     - dataset[i] returns an Example.
+
+     - dataset[[i1,i2,...in]] returns an Example with examples i1,i2,...in.
+
+    A similar command gives you a DataSet instead of Examples :
+
+     - dataset.subset[:n] returns a DataSet with the n first examples.
+
+     - dataset.subset[i1:i2:s] returns a DataSet with the examples i1,i1+s,...i2-s.
+
+     - dataset.subset[i] returns a DataSet.
+
+     - dataset.subset[[i1,i2,...in]] returns a DataSet with examples i1,i2,...in.
+
+
+     - dataset.<property> returns the value of a property associated with
+     the name <property>. The following properties should be supported:
+          - 'description': a textual description or name for the dataset
+          - 'fieldtypes': a list of types (one per field)
+    A DataSet may have other attributes that it makes visible to other objects. These are
+    used to store information that is not example-wise but global to the dataset.
+    The list of names of these attributes is given by the attribute_names() method.
+
+    Datasets can be concatenated either vertically (increasing the length) or
+    horizontally (augmenting the set of fields), if they are compatible, using
+    the following operations (with the same basic semantics as numpy.hstack
+    and numpy.vstack):
+
+     - dataset1 | dataset2 | dataset3 == dataset.hstack([dataset1,dataset2,dataset3])
+
+    creates a new dataset whose list of fields is the concatenation of the list of
+    fields of the argument datasets. This only works if they all have the same length.
+
+     - dataset1 & dataset2 & dataset3 == dataset.vstack([dataset1,dataset2,dataset3])
+
+    creates a new dataset that concatenates the examples from the argument datasets
+    (and whose length is the sum of the length of the argument datasets). This only
+    works if they all have the same fields.
+
+    According to the same logic, and viewing a DataSetFields object associated to
+    a DataSet as a kind of transpose of it, fields1 & fields2 concatenates fields of
+    a DataSetFields fields1 and fields2, and fields1 | fields2 concatenates their
+    examples.
+
+    A dataset can hold arbitrary key-value pairs that may be used to access meta-data
+    or other properties of the dataset or associated with the dataset or the result
+    of a computation stored in a dataset. These can be accessed through the [key] syntax
+    when key is a string (or more specifically, neither an integer, a slice, nor a list).
+
+    A DataSet sub-class should always redefine the following methods:
+       - __len__ if it is not a stream
+       - fieldNames
+       - minibatches_nowrap (called by DataSet.minibatches())
+    For efficiency of implementation, a sub-class might also want to redefine
+       - valuesHStack
+       - valuesVStack
+       - hasFields
+       - __getitem__ may not be feasible with some streams
+       - __iter__
+    A sub-class should also append attributes to self._attribute_names
+    (the default value returned by attributeNames()).
+    By convention, attributes not in attributeNames() should have a name
+    starting with an underscore.
+    @todo enforce/test that convention!
+    """
+
+    numpy_vstack = lambda fieldname,values: numpy.vstack(values)
+    numpy_hstack = lambda fieldnames,values: numpy.hstack(values)
+        
+    def __init__(self, description=None, fieldnames=None, fieldtypes=None):
+        """
+        @type fieldnames: list of strings
+        @type fieldtypes: list of python types, same length as fieldnames
+        @type description: string 
+        @param description: description/name for this dataset
+        """
+        def default_desc():
+            return type(self).__name__ \
+                    + " ( " + join([x.__name__ for x in type(self).__bases__]) + " )"
+
+        #self.fieldnames = fieldnames
+
+        self.fieldtypes = fieldtypes if fieldtypes is not None \
+                else [None]*1 #len(fieldnames)
+
+        self.description =  default_desc() if description is None \
+                else description
+        self._attribute_names = ["description"]
+
+
+    attributeNames = property(lambda self: copy.copy(self._attribute_names))
+
+    def __contains__(self, fieldname):
+        return (fieldname in self.fieldNames()) \
+                or (fieldname in self.attributeNames())
+
+    def __iter__(self):
+        """Supports the syntax "for i in dataset: ..."
+
+        Using this syntax, "i" will be an Example instance (or equivalent) with
+        all the fields of DataSet self.  Every field of "i" will give access to
+        a field of a single example.  Fields should be accessible via
+        i["fielname"] or i[3] (in the order defined by the elements of the
+        Example returned by this iterator), but the derived class is free
+        to accept any type of identifier, and add extra functionality to the iterator.
+
+        The default implementation calls the minibatches iterator and extracts the first example of each field.
+        """
+        return DataSet.MinibatchToSingleExampleIterator(self.minibatches(None, minibatch_size = 1))
+
+    def __len__(self):
+        """
+        len(dataset) returns the number of examples in the dataset.
+        By default, a DataSet is a 'stream', i.e. it has an unbounded length (sys.maxint).
+        Sub-classes which implement finite-length datasets should redefine this method.
+        Some methods only make sense for finite-length datasets.
+        """
+        from sys import maxint
+        return maxint
+
+
+    class MinibatchToSingleExampleIterator(object):
+        """
+        Converts the result of minibatch iterator with minibatch_size==1 into
+        single-example values in the result. Therefore the result of
+        iterating on the dataset itself gives a sequence of single examples
+        (whereas the result of iterating over minibatches gives in each
+        Example field an iterable object over the individual examples in
+        the minibatch).
+        """
+        def __init__(self, minibatch_iterator):
+            self.minibatch_iterator = minibatch_iterator
+            self.minibatch = None
+        def __iter__(self): #makes for loop work
+            return self
+        def next(self):
+            size1_minibatch = self.minibatch_iterator.next()
+            if not self.minibatch:
+                names = size1_minibatch.keys()
+                # next lines are a hack, but there was problem when we were getting [array(327)] for instance
+                try:
+                    values = [value[0] for value in size1_minibatch.values()]
+                except :
+                    values = [value for value in size1_minibatch.values()]
+                self.minibatch = Example(names,values)
+            else:
+                self.minibatch._values = [value[0] for value in size1_minibatch.values()]
+            return self.minibatch
+        
+        def next_index(self):
+            return self.minibatch_iterator.next_index()
+
+    class MinibatchWrapAroundIterator(object):
+        """
+        An iterator for minibatches that handles the case where we need to wrap around the
+        dataset because n_batches*minibatch_size > len(dataset). It is constructed from
+        a dataset that provides a minibatch iterator that does not need to handle that problem.
+        This class is a utility for dataset subclass writers, so that they do not have to handle
+        this issue multiple times, nor check that fieldnames are valid, nor handle the
+        empty fieldnames (meaning 'use all the fields').
+        """
+        def __init__(self,dataset,fieldnames,minibatch_size,n_batches,offset):
+            self.dataset=dataset
+            self.fieldnames=fieldnames
+            self.minibatch_size=minibatch_size
+            self.n_batches=n_batches
+            self.n_batches_done=0
+            self.next_row=offset
+            self.L=len(dataset)
+            self.offset=offset % self.L
+            ds_nbatches =  (self.L-self.next_row)/self.minibatch_size
+            if n_batches is not None:
+                ds_nbatches = min(n_batches,ds_nbatches)
+            if fieldnames:
+                assert dataset.hasFields(*fieldnames)
+            else:
+                self.fieldnames=dataset.fieldNames()
+            self.iterator = self.dataset.minibatches_nowrap(self.fieldnames,self.minibatch_size, ds_nbatches,self.next_row)
+
+        def __iter__(self):
+            return self
+
+        def next_index(self):
+            return self.next_row
+
+        def next(self):
+            if self.n_batches and self.n_batches_done==self.n_batches:
+                raise StopIteration
+            elif not self.n_batches and self.next_row ==self.L:
+                raise StopIteration
+            upper = self.next_row+self.minibatch_size
+            if upper <=self.L:
+                minibatch = self.iterator.next()
+            else:
+                if not self.n_batches:
+                    upper=min(upper, self.L)
+                    # if their is not a fixed number of batch, we continue to the end of the dataset.
+                    # this can create a minibatch that is smaller then the minibatch_size
+                    assert (self.L-self.next_row)<=self.minibatch_size
+                    minibatch = self.dataset.minibatches_nowrap(self.fieldnames,self.L-self.next_row,1,self.next_row).next()
+                else:
+                    # we must concatenate (vstack) the bottom and top parts of our minibatch
+                    # first get the beginning of our minibatch (top of dataset)
+                    first_part = self.dataset.minibatches_nowrap(self.fieldnames,self.L-self.next_row,1,self.next_row).next()
+                    second_part = self.dataset.minibatches_nowrap(self.fieldnames,upper-self.L,1,0).next()
+                    minibatch = Example(self.fieldnames,
+                                        [self.dataset.valuesVStack(name,[first_part[name],second_part[name]])
+                                         for name in self.fieldnames])
+            self.next_row=upper
+            self.n_batches_done+=1
+            if upper >= self.L and self.n_batches:
+                self.next_row -= self.L
+                ds_nbatches =  (self.L-self.next_row)/self.minibatch_size
+                if self.n_batches is not None:
+                    ds_nbatches = min(self.n_batches,ds_nbatches)
+                self.iterator = self.dataset.minibatches_nowrap(self.fieldnames,self.minibatch_size,
+                                                                ds_nbatches,self.next_row)
+            return DataSetFields(MinibatchDataSet(minibatch,self.dataset.valuesVStack,
+                                                  self.dataset.valuesHStack),
+                                 minibatch.keys())
+
+
+    minibatches_fieldnames = None
+    minibatches_minibatch_size = 1
+    minibatches_n_batches = None
+    def minibatches(self,
+                    fieldnames = minibatches_fieldnames,
+                    minibatch_size = minibatches_minibatch_size,
+                    n_batches = minibatches_n_batches,
+                    offset = 0):
+        """
+        Return an iterator that supports three forms of syntax:
+
+            for i in dataset.minibatches(None,**kwargs): ...
+
+            for i in dataset.minibatches([f1, f2, f3],**kwargs): ...
+
+            for i1, i2, i3 in dataset.minibatches([f1, f2, f3],**kwargs): ...
+
+        Using the first two syntaxes, "i" will be an indexable object, such as a list,
+        tuple, or Example instance. In both cases, i[k] is a list-like container
+        of a batch of current examples. In the second case, i[0] is
+        list-like container of the f1 field of a batch current examples, i[1] is
+        a list-like container of the f2 field, etc.
+
+        Using the first syntax, all the fields will be returned in "i".
+        Using the third syntax, i1, i2, i3 will be list-like containers of the
+        f1, f2, and f3 fields of a batch of examples on each loop iteration.
+
+        The minibatches iterator is expected to return upon each call to next()
+        a DataSetFields object, which is a Example (indexed by the field names) whose
+        elements are iterable and indexable over the minibatch examples, and which keeps a pointer to
+        a sub-dataset that can be used to iterate over the individual examples
+        in the minibatch. Hence a minibatch can be converted back to a regular
+        dataset or its fields can be looked at individually (and possibly iterated over).
+
+        PARAMETERS
+        - fieldnames (list of any type, default None):
+        The loop variables i1, i2, i3 (in the example above) should contain the
+        f1, f2, and f3 fields of the current batch of examples.  If None, the
+        derived class can choose a default, e.g. all fields.
+
+        - minibatch_size (integer, default 1)
+        On every iteration, the variables i1, i2, i3 will have
+        exactly minibatch_size elements. e.g. len(i1) == minibatch_size
+
+        @DEPRECATED n_batches : not used anywhere
+        - n_batches (integer, default None)
+        The iterator will loop exactly this many times, and then stop.  If None,
+        the derived class can choose a default.  If (-1), then the returned
+        iterator should support looping indefinitely.
+
+        - offset (integer, default 0)
+        The iterator will start at example 'offset' in the dataset, rather than the default.
+        
+        Note: A list-like container is something like a tuple, list, numpy.ndarray or
+        any other object that supports integer indexing and slicing.
+
+        @ATTENTION: now minibatches returns minibatches_nowrap, which is supposed to return complete
+        batches only, raise StopIteration.
+        @ATTENTION: minibatches returns a LookupList, we can't iterate over examples on it.
+
+        """
+        #return DataSet.MinibatchWrapAroundIterator(self, fieldnames, minibatch_size, n_batches,offset)
+        assert offset >= 0
+        assert offset < len(self)
+        assert offset + minibatch_size -1 < len(self)
+        if fieldnames == None :
+            fieldnames = self.fieldNames()
+        return self.minibatches_nowrap(fieldnames,minibatch_size,n_batches,offset)
+
+    def minibatches_nowrap(self,fieldnames,minibatch_size,n_batches,offset):
+        """
+        This is the minibatches iterator generator that sub-classes must define.
+        It does not need to worry about wrapping around multiple times across the dataset,
+        as this is handled by MinibatchWrapAroundIterator when DataSet.minibatches() is called.
+        The next() method of the returned iterator does not even need to worry about
+        the termination condition (as StopIteration will be raised by DataSet.minibatches
+        before an improper call to minibatches_nowrap's next() is made).
+        That next() method can assert that its next row will always be within [0,len(dataset)).
+        The iterator returned by minibatches_nowrap does not need to implement
+        a next_index() method either, as this will be provided by MinibatchWrapAroundIterator.
+        """
+        raise AbstractFunction()
+
+    def is_unbounded(self):
+        """
+        Tests whether a dataset is unbounded (e.g. a stream).
+        """
+        return len(self)==maxint
+
+    def hasFields(self,*fieldnames):
+        """
+        Return true if the given field name (or field names, if multiple arguments are
+        given) is recognized by the DataSet (i.e. can be used as a field name in one
+        of the iterators).
+
+        The default implementation may be inefficient (O(# fields in dataset)), as it calls the fieldNames()
+        method. Many datasets may store their field names in a dictionary, which would allow more efficiency.
+        """
+        return len(unique_elements_list_intersection(fieldnames,self.fieldNames()))>0
+        
+    def fieldNames(self):
+        """
+        Return the list of field names that are supported by the iterators,
+        and for which hasFields(fieldname) would return True.
+        """
+        raise AbstractFunction()
+
+    def __call__(self,*fieldnames):
+        """
+        Return a dataset that sees only the fields whose name are specified.
+        """
+        assert self.hasFields(*fieldnames)
+        #return self.fields(*fieldnames).examples()
+        fieldnames_list = list(fieldnames)
+        return FieldsSubsetDataSet(self,fieldnames_list)
+
+    def cached_fields_subset(self,*fieldnames) :
+        """
+        Behaviour is supposed to be the same as __call__(*fieldnames), but the dataset returned is cached.
+        @see : dataset.__call__
+        """
+        assert self.hasFields(*fieldnames)
+        return self.fields(*fieldnames).examples()
+
+    def fields(self,*fieldnames):
+        """
+        Return a DataSetFields object associated with this dataset.
+        """
+        return DataSetFields(self,fieldnames)
+
+    def getitem_key(self, fieldname):
+        """A not-so-well thought-out place to put code that used to be in
+        getitem.
+        """
+        #removing as per discussion June 4. --JSB
+
+        i = fieldname
+        # else check for a fieldname
+        if self.hasFields(i):
+            return self.minibatches(fieldnames=[i],minibatch_size=len(self),n_batches=1,offset=0).next()[0]
+        # else we are trying to access a property of the dataset
+        assert i in self.__dict__ # else it means we are trying to access a non-existing property
+        return self.__dict__[i]
+
+    def __getitem__(self,i):
+        """
+        @rtype: Example 
+        @returns: single or multiple examples
+
+        @type i: integer or slice or <iterable> of integers
+        @param i:
+            dataset[i] returns the (i+1)-th example of the dataset.
+            dataset[i:j] returns a LookupList with examples i,i+1,...,j-1.
+            dataset[i:j:s] returns a LookupList with examples i,i+2,i+4...,j-2.
+            dataset[[i1,i2,..,in]] returns a LookupList with examples i1,i2,...,in.
+
+        @note:
+        Some stream datasets may be unable to implement random access, i.e.
+        arbitrary slicing/indexing because they can only iterate through
+        examples one or a minibatch at a time and do not actually store or keep
+        past (or future) examples.
+
+        The default implementation of getitem uses the minibatches iterator
+        to obtain one example, one slice, or a list of examples. It may not
+        always be the most efficient way to obtain the result, especially if
+        the data are actually stored in a memory array.
+        """
+
+        if type(i) is int:
+            assert i >= 0 # TBM: see if someone complains and want negative i
+            if i >= len(self) :
+                raise IndexError
+            i_batch = self.minibatches_nowrap(self.fieldNames(),
+                    minibatch_size=1, n_batches=1, offset=i)
+            return DataSet.MinibatchToSingleExampleIterator(i_batch).next()
+
+        #if i is a contiguous slice
+        if type(i) is slice and (i.step in (None, 1)):
+            offset = 0 if i.start is None else i.start
+            upper_bound = len(self) if i.stop is None else i.stop
+            upper_bound = min(len(self) , upper_bound)
+            #return MinibatchDataSet(self.minibatches_nowrap(self.fieldNames(),
+            #        minibatch_size=upper_bound - offset,
+            #        n_batches=1,
+            #        offset=offset).next())
+            # now returns a LookupList
+            return self.minibatches_nowrap(self.fieldNames(),
+                    minibatch_size=upper_bound - offset,
+                    n_batches=1,
+                    offset=offset).next()
+
+        # if slice has a step param, convert it to list and handle it with the
+        # list code
+        if type(i) is slice:
+            offset = 0 if i.start is None else i.start
+            upper_bound = len(self) if i.stop is None else i.stop
+            upper_bound = min(len(self) , upper_bound)
+            i = list(range(offset, upper_bound, i.step))
+
+        # handle tuples, arrays, lists
+        if hasattr(i, '__getitem__'):
+            for idx in i:
+                #dis-allow nested slices
+                if not isinstance(idx, int):
+                    raise TypeError(idx)
+                if idx >= len(self) :
+                    raise IndexError
+            # call back into self.__getitem__
+            examples = [self.minibatches_nowrap(self.fieldNames(),
+                    minibatch_size=1, n_batches=1, offset=ii).next()
+                    for ii in i]
+            # re-index the fields in each example by field instead of by example
+            field_values = [[] for blah in  self.fieldNames()]
+            for e in examples:
+                for f,v in zip(field_values, e):
+                    f.append(v)
+            #build them into a LookupList (a.ka. Example)
+            zz = zip(self.fieldNames(),field_values)
+            vst = [self.valuesVStack(fieldname,field_values) for fieldname,field_values in zz]
+            example = Example(self.fieldNames(), vst)
+            #return MinibatchDataSet(example, self.valuesVStack, self.valuesHStack)
+            # now returns a LookupList
+            return example
+
+        # what in the world is i?
+        raise TypeError(i, type(i))
+
+
+    """
+    Enables the call dataset.subset[a:b:c] that will return a DataSet
+    around the examples returned by __getitem__(slice(a,b,c))
+       
+    @SEE DataSet.__getsubset(self)
+    """
+    subset = property(lambda s : s.__getsubset(),doc="returns a subset as a DataSet")
+
+
+    def __getsubset(self) :
+        """
+        Enables the call data.subset[a:b:c], returns a DataSet.
+        Default implementation is a simple wrap around __getitem__() using MinibatchDataSet.
+
+        @RETURN DataSet
+        @SEE DataSet.subset = property(lambda s : s.__getsubset())
+        """
+        _self = self
+        class GetSliceReturnsDataSet(object) :
+            def __getitem__(self,slice) :
+                return MinibatchDataSet(_self.__getitem__(slice))
+        return GetSliceReturnsDataSet()
+
+
+
+    def valuesHStack(self,fieldnames,fieldvalues):
+        """
+        Return a value that corresponds to concatenating (horizontally) several field values.
+        This can be useful to merge some fields. The implementation of this operation is likely
+        to involve a copy of the original values. When the values are numpy arrays, the
+        result should be numpy.hstack(values). If it makes sense, this operation should
+        work as well when each value corresponds to multiple examples in a minibatch
+        e.g. if each value is a Ni-vector and a minibatch of length L is a LxNi matrix,
+        then the result should be a Lx(N1+N2+..) matrix equal to numpy.hstack(values).
+        The default is to use numpy.hstack for numpy.ndarray values, and a list
+        pointing to the original values for other data types.
+        """
+        all_numpy=True
+        for value in fieldvalues:
+            if not type(value) is numpy.ndarray:
+                all_numpy=False
+        if all_numpy:
+            return numpy.hstack(fieldvalues)
+        # the default implementation of horizontal stacking is to put values in a list
+        return fieldvalues
+
+    def valuesVStack(self,fieldname,values):
+        """
+        @param fieldname: the name of the field from which the values were taken 
+        @type fieldname: any type 
+
+        @param values: bits near the beginning or end of the dataset 
+        @type values: list of minibatches (returned by minibatches_nowrap) 
+
+        @return: the concatenation (stacking) of the values 
+        @rtype: something suitable as a minibatch field 
+        """
+        rval = []
+        for v in values:
+            rval.extend(v)
+        return rval
+
+    def __or__(self,other):
+        """
+        dataset1 | dataset2 returns a dataset whose list of fields is the concatenation of the list of
+        fields of the argument datasets. This only works if they all have the same length.
+        """
+        return HStackedDataSet([self,other])
+
+    def __and__(self,other):
+        """
+        dataset1 & dataset2 is a dataset that concatenates the examples from the argument datasets
+        (and whose length is the sum of the length of the argument datasets). This only
+        works if they all have the same fields.
+        """
+        return VStackedDataSet([self,other])
+
+def hstack(datasets):
+    """
+    hstack(dataset1,dataset2,...) returns dataset1 | datataset2 | ...
+    which is a dataset whose fields list is the concatenation of the fields
+    of the individual datasets.
+    """
+    assert len(datasets)>0
+    if len(datasets)==1:
+        return datasets[0]
+    return HStackedDataSet(datasets)
+
+def vstack(datasets):
+    """
+    vstack(dataset1,dataset2,...) returns dataset1 & datataset2 & ...
+    which is a dataset which iterates first over the examples of dataset1, then
+    over those of dataset2, etc.
+    """
+    assert len(datasets)>0
+    if len(datasets)==1:
+        return datasets[0]
+    return VStackedDataSet(datasets)
+
+class FieldsSubsetDataSet(DataSet):
+    """
+    A sub-class of L{DataSet} that selects a subset of the fields.
+    """
+    def __init__(self,src,fieldnames):
+        self.src=src
+        self.fieldnames=fieldnames
+        assert src.hasFields(*fieldnames)
+        self.valuesHStack = src.valuesHStack
+        self.valuesVStack = src.valuesVStack
+
+    def __len__(self): return len(self.src)
+    
+    def fieldNames(self):
+        return self.fieldnames
+
+    def __iter__(self):
+        class FieldsSubsetIterator(object):
+            def __init__(self,ds):
+                self.ds=ds
+                self.src_iter=ds.src.__iter__()
+                self.example=None
+            def __iter__(self): return self
+            def next(self):
+                complete_example = self.src_iter.next()
+                if self.example:
+                    self.example._values=[complete_example[field]
+                                          for field in self.ds.fieldnames]
+                else:
+                    self.example=Example(self.ds.fieldnames,
+                                         [complete_example[field] for field in self.ds.fieldnames])
+                return self.example
+        return FieldsSubsetIterator(self)
+
+    def minibatches_nowrap(self,fieldnames,minibatch_size,n_batches,offset):
+        assert self.hasFields(*fieldnames)
+        return self.src.minibatches_nowrap(fieldnames,minibatch_size,n_batches,offset)
+    def dontuse__getitem__(self,i):
+        return FieldsSubsetDataSet(self.src[i],self.fieldnames)
+    
+class RenamedFieldsDataSet(DataSet):
+    """
+    A sub-class of L{DataSet} that selects and renames a subset of the fields.
+    """
+    def __init__(self,src,src_fieldnames,new_fieldnames):
+        self.src=src
+        self.src_fieldnames=src_fieldnames
+        self.new_fieldnames=new_fieldnames
+        assert src.hasFields(*src_fieldnames)
+        assert len(src_fieldnames)==len(new_fieldnames)
+        self.valuesHStack = src.valuesHStack
+        self.valuesVStack = src.valuesVStack
+        self.lookup_fields = Example(new_fieldnames,src_fieldnames)
+
+    def __len__(self): return len(self.src)
+    
+    def fieldNames(self):
+        return self.new_fieldnames
+
+    def __iter__(self):
+        class FieldsSubsetIterator(object):
+            def __init__(self,ds):
+                self.ds=ds
+                self.src_iter=ds.src.__iter__()
+                self.example=None
+            def __iter__(self): return self
+            def next(self):
+                complete_example = self.src_iter.next()
+                if self.example:
+                    self.example._values=[complete_example[field]
+                                          for field in self.ds.src_fieldnames]
+                else:
+                    self.example=Example(self.ds.new_fieldnames,
+                                         [complete_example[field]
+                                          for field in self.ds.src_fieldnames])
+                return self.example
+        return FieldsSubsetIterator(self)
+
+    def minibatches_nowrap(self,fieldnames,minibatch_size,n_batches,offset):
+        assert self.hasFields(*fieldnames)
+        cursor = Example(fieldnames,[0]*len(fieldnames))
+        for batch in self.src.minibatches_nowrap([self.lookup_fields[f] for f in fieldnames],minibatch_size,n_batches,offset):
+            cursor._values=batch._values
+            yield cursor
+    
+    def __getitem__(self,i):
+#        return FieldsSubsetDataSet(self.src[i],self.new_fieldnames)
+        complete_example = self.src[i]
+        return Example(self.new_fieldnames,
+                             [complete_example[field]
+                              for field in self.src_fieldnames])
+
+
+
+class DataSetFields(Example):
+    """
+    Although a L{DataSet} iterates over examples (like rows of a matrix), an associated
+    DataSetFields iterates over fields (like columns of a matrix), and can be understood
+    as a transpose of the associated dataset.
+
+    To iterate over fields, one can do
+    * for fields in dataset.fields()
+    * for fields in dataset(field1,field2,...).fields() to select a subset of fields
+    * for fields in dataset.fields(field1,field2,...) to select a subset of fields
+    and each of these fields is iterable over the examples:
+    * for field_examples in dataset.fields():
+        for example_value in field_examples:
+           ...
+    but when the dataset is a stream (unbounded length), it is not recommended to do 
+    such things because the underlying dataset may refuse to access the different fields in
+    an unsynchronized ways. Hence the fields() method is illegal for streams, by default.
+    The result of fields() is a DataSetFields object, which iterates over fields,
+    and whose elements are iterable over examples. A DataSetFields object can
+    be turned back into a DataSet with its examples() method:
+      dataset2 = dataset1.fields().examples()
+    and dataset2 should behave exactly like dataset1 (in fact by default dataset2==dataset1).
+
+    DataSetFields can be concatenated vertically or horizontally. To be consistent with
+    the syntax used for DataSets, the | concatenates the fields and the & concatenates
+    the examples.
+    """
+    def __init__(self,dataset,fieldnames):
+        original_dataset=dataset
+        if not fieldnames:
+            fieldnames=dataset.fieldNames()
+        elif not list(fieldnames)==list(dataset.fieldNames()):
+            #we must cast to list, othersize('x','y')!=['x','y']
+            dataset = FieldsSubsetDataSet(dataset,fieldnames)
+        assert dataset.hasFields(*fieldnames)
+        self.dataset=dataset
+
+        if isinstance(dataset,MinibatchDataSet):
+            Example.__init__(self,fieldnames,list(dataset._fields))
+        elif isinstance(original_dataset,MinibatchDataSet):
+            Example.__init__(self,fieldnames,
+                                [original_dataset._fields[field]
+                                 for field in fieldnames])
+        else:
+            minibatch_iterator = dataset.minibatches(fieldnames,
+                                                     minibatch_size=len(dataset),
+                                                     n_batches=1)
+            minibatch=minibatch_iterator.next()
+            Example.__init__(self,fieldnames,minibatch)
+        
+    def examples(self):
+        return self.dataset
+    
+    def __or__(self,other):
+        """
+        fields1 | fields2 is a DataSetFields that whose list of examples is the concatenation
+        of the list of examples of DataSetFields fields1 and fields2.
+        """
+        return (self.examples() + other.examples()).fields()
+
+    def __and__(self,other):
+        """
+        fields1 + fields2 is a DataSetFields that whose list of fields is the concatenation
+        of the fields of DataSetFields fields1 and fields2.
+        """
+        return (self.examples() | other.examples()).fields()
+
+    
+class MinibatchDataSet(DataSet):
+    """
+    Turn a L{Example} of same-length (iterable) fields into an example-iterable dataset.
+    Each element of the lookup-list should be an iterable and sliceable, all of the same length.
+    """
+    def __init__(self,fields_lookuplist,values_vstack=DataSet().valuesVStack,
+                 values_hstack=DataSet().valuesHStack):
+        """
+        The user can (and generally should) also provide values_vstack(fieldname,fieldvalues)
+        and a values_hstack(fieldnames,fieldvalues) functions behaving with the same
+        semantics as the DataSet methods of the same name (but without the self argument).
+        """
+
+        self._fields=fields_lookuplist
+        assert len(fields_lookuplist)>0
+        self.length=len(fields_lookuplist[0])
+        for field in fields_lookuplist[1:]:
+            if self.length != len(field) :
+                print 'self.length = ',self.length
+                print 'len(field) = ', len(field)
+                print 'self._fields.keys() = ', self._fields.keys()
+                print 'field=',field
+                print 'fields_lookuplist=', fields_lookuplist
+            assert self.length==len(field)
+        self.valuesVStack=values_vstack
+        self.valuesHStack=values_hstack
+
+    def __len__(self):
+        return self.length
+
+    def dontuse__getitem__(self,i):
+        if type(i) in (slice,list):
+            return DataSetFields(MinibatchDataSet(
+                Example(self._fields.keys(),[field[i] for field in self._fields])),self.fieldNames())
+        if type(i) is int:
+            return Example(self._fields.keys(),[field[i] for field in self._fields])
+        if self.hasFields(i):
+            return self._fields[i]
+        assert i in self.__dict__ # else it means we are trying to access a non-existing property
+        return self.__dict__[i]
+
+    def fieldNames(self):
+        return self._fields.keys()
+
+    def hasFields(self,*fieldnames):
+        for fieldname in fieldnames:
+            if fieldname not in self._fields.keys():
+                return False
+        return True
+
+    def minibatches_nowrap(self,fieldnames,minibatch_size,n_batches,offset):
+        #@TODO bug somewhere here, fieldnames doesnt seem to be well handled
+        class Iterator(object):
+            def __init__(self,ds,fieldnames):
+                # tbm: added two next lines to handle fieldnames
+                if fieldnames is None: fieldnames = ds._fields.keys()
+                self.fieldnames = fieldnames
+
+                self.ds=ds
+                self.next_example=offset
+                assert minibatch_size >= 0
+                if offset+minibatch_size > ds.length:
+                    raise NotImplementedError()
+            def __iter__(self):
+                return self
+            def next(self):
+                upper = self.next_example+minibatch_size
+                if upper > len(self.ds) :
+                    raise StopIteration()
+                assert upper<=len(self.ds) # instead of self.ds.length
+                #minibatch = Example(self.ds._fields.keys(),
+                #                    [field[self.next_example:upper]
+                #                     for field in self.ds._fields])
+                # tbm: modif to use fieldnames
+                values = []
+                for f in self.fieldnames :
+                    #print 'we have field',f,'in fieldnames'
+                    values.append( self.ds._fields[f][self.next_example:upper] )
+                minibatch = Example(self.fieldnames,values)
+                #print minibatch
+                self.next_example+=minibatch_size
+                return minibatch
+
+        # tbm: added fieldnames to handle subset of fieldnames
+        return Iterator(self,fieldnames)
+
+class HStackedDataSet(DataSet):
+    """
+    A L{DataSet} that wraps several datasets and shows a view that includes all their fields,
+    i.e. whose list of fields is the concatenation of their lists of fields.
+
+    If a field name is found in more than one of the datasets, then either an error is
+    raised or the fields are renamed (either by prefixing the __name__ attribute 
+    of the dataset + ".", if it exists, or by suffixing the dataset index in the argument list).
+
+    @todo: automatically detect a chain of stacked datasets due to A | B | C | D ...
+    """
+    def __init__(self,datasets,accept_nonunique_names=False,description=None,field_types=None):
+        DataSet.__init__(self,description,field_types)
+        self.datasets=datasets
+        self.accept_nonunique_names=accept_nonunique_names
+        self.fieldname2dataset={}
+
+        def rename_field(fieldname,dataset,i):
+            if hasattr(dataset,"__name__"):
+                return dataset.__name__ + "." + fieldname
+            return fieldname+"."+str(i)
+            
+        # make sure all datasets have the same length and unique field names
+        self.length=None
+        names_to_change=[]
+        for i in xrange(len(datasets)):
+            dataset = datasets[i]
+            length=len(dataset)
+            if self.length:
+                assert self.length==length
+            else:
+                self.length=length
+            for fieldname in dataset.fieldNames():
+                if fieldname in self.fieldname2dataset: # name conflict!
+                    if accept_nonunique_names:
+                        fieldname=rename_field(fieldname,dataset,i)
+                        names2change.append((fieldname,i))
+                    else:
+                        raise ValueError("Incompatible datasets: non-unique field name = "+fieldname)
+                self.fieldname2dataset[fieldname]=i
+        for fieldname,i in names_to_change:
+            del self.fieldname2dataset[fieldname]
+            self.fieldname2dataset[rename_field(fieldname,self.datasets[i],i)]=i
+            
+    def __len__(self):
+        return len(self.datasets[0])
+    
+    def hasFields(self,*fieldnames):
+        for fieldname in fieldnames:
+            if not fieldname in self.fieldname2dataset:
+                return False
+        return True
+
+    def fieldNames(self):
+        return self.fieldname2dataset.keys()
+            
+    def minibatches_nowrap(self,fieldnames,minibatch_size,n_batches,offset):
+
+        class HStackedIterator(object):
+            def __init__(self,hsds,iterators):
+                self.hsds=hsds
+                self.iterators=iterators
+            def __iter__(self):
+                return self
+            def next(self):
+                # concatenate all the fields of the minibatches
+                l=Example()
+                for iter in self.iterators:
+                    l.append_lookuplist(iter.next())
+                return l
+                                     
+        assert self.hasFields(*fieldnames)
+        # find out which underlying datasets are necessary to service the required fields
+        # and construct corresponding minibatch iterators
+        if fieldnames and fieldnames!=self.fieldNames():
+            datasets=set([])
+            fields_in_dataset=dict([(dataset,[]) for dataset in datasets])
+            for fieldname in fieldnames:
+                dataset=self.datasets[self.fieldname2dataset[fieldname]]
+                datasets.add(dataset)
+                fields_in_dataset[dataset].append(fieldname)
+            datasets=list(datasets)
+            iterators=[dataset.minibatches(fields_in_dataset[dataset],minibatch_size,n_batches,offset)
+                       for dataset in datasets]
+        else:
+            datasets=self.datasets
+            iterators=[dataset.minibatches(None,minibatch_size,n_batches,offset) for dataset in datasets]
+        return HStackedIterator(self,iterators)
+
+
+    def untested_valuesVStack(self,fieldname,fieldvalues):
+        return self.datasets[self.fieldname2dataset[fieldname]].valuesVStack(fieldname,fieldvalues)
+    
+    def untested_valuesHStack(self,fieldnames,fieldvalues):
+        """
+        We will use the sub-dataset associated with the first fieldname in the fieldnames list
+        to do the work, hoping that it can cope with the other values (i.e. won't care
+        about the incompatible fieldnames). Hence this heuristic will always work if
+        all the fieldnames are of the same sub-dataset.
+        """
+        return self.datasets[self.fieldname2dataset[fieldnames[0]]].valuesHStack(fieldnames,fieldvalues)
+
+class VStackedDataSet(DataSet):
+    """
+    A L{DataSet} that wraps several datasets and shows a view that includes all their examples,
+    in the order provided. This clearly assumes that they all have the same field names
+    and all (except possibly the last one) are of finite length.
+
+    @todo: automatically detect a chain of stacked datasets due to A + B + C + D ...
+    """
+    def __init__(self,datasets):
+        self.datasets=datasets
+        self.length=0
+        self.index2dataset={}
+        assert len(datasets)>0
+        fieldnames = datasets[-1].fieldNames()
+        self.datasets_start_row=[]
+        # We use this map from row index to dataset index for constant-time random access of examples,
+        # to avoid having to search for the appropriate dataset each time and slice is asked for.
+        for dataset,k in enumerate(datasets[0:-1]):
+            assert dataset.is_unbounded() # All VStacked datasets (except possibly the last) must be bounded (have a length).
+            L=len(dataset)
+            for i in xrange(L):
+                self.index2dataset[self.length+i]=k
+            self.datasets_start_row.append(self.length)
+            self.length+=L
+            assert dataset.fieldNames()==fieldnames
+        self.datasets_start_row.append(self.length)
+        self.length+=len(datasets[-1])
+        # If length is very large, we should use a more memory-efficient mechanism
+        # that does not store all indices
+        if self.length>1000000:
+            # 1 million entries would require about 60 meg for the index2dataset map
+            # TODO
+            print "A more efficient mechanism for index2dataset should be implemented"
+
+    def __len__(self):
+        return self.length
+    
+    def fieldNames(self):
+        return self.datasets[0].fieldNames()
+
+    def hasFields(self,*fieldnames):
+        return self.datasets[0].hasFields(*fieldnames)
+
+    def locate_row(self,row):
+        """Return (dataset_index, row_within_dataset) for global row number"""
+        dataset_index = self.index2dataset[row]
+        row_within_dataset = self.datasets_start_row[dataset_index]
+        return dataset_index, row_within_dataset
+        
+    def minibatches_nowrap(self,fieldnames,minibatch_size,n_batches,offset):
+
+        class VStackedIterator(object):
+            def __init__(self,vsds):
+                self.vsds=vsds
+                self.next_row=offset
+                self.next_dataset_index,self.next_dataset_row=self.vsds.locate_row(offset)
+                self.current_iterator,self.n_left_at_the_end_of_ds,self.n_left_in_mb= \
+                  self.next_iterator(vsds.datasets[0],offset,n_batches)
+
+            def next_iterator(self,dataset,starting_offset,batches_left):
+                L=len(dataset)
+                ds_nbatches = (L-starting_offset)/minibatch_size
+                if batches_left is not None:
+                    ds_nbatches = max(batches_left,ds_nbatches)
+                if minibatch_size>L:
+                    ds_minibatch_size=L
+                    n_left_in_mb=minibatch_size-L
+                    ds_nbatches=1
+                else:
+                    n_left_in_mb=0
+                return dataset.minibatches(fieldnames,minibatch_size,ds_nbatches,starting_offset), \
+                       L-(starting_offset+ds_nbatches*minibatch_size), n_left_in_mb
+
+            def move_to_next_dataset(self):
+                if self.n_left_at_the_end_of_ds>0:
+                    self.current_iterator,self.n_left_at_the_end_of_ds,self.n_left_in_mb= \
+                      self.next_iterator(vsds.datasets[self.next_dataset_index],
+                                         self.n_left_at_the_end_of_ds,1)
+                else:
+                    self.next_dataset_index +=1
+                    if self.next_dataset_index==len(self.vsds.datasets):
+                        self.next_dataset_index = 0
+                    self.current_iterator,self.n_left_at_the_end_of_ds,self.n_left_in_mb= \
+                      self.next_iterator(vsds.datasets[self.next_dataset_index],starting_offset,n_batches)
+                
+            def __iter__(self):
+                return self
+
+            def next(self):
+                dataset=self.vsds.datasets[self.next_dataset_index]
+                mb = self.next_iterator.next()
+                if self.n_left_in_mb:
+                    extra_mb = []
+                    while self.n_left_in_mb>0:
+                        self.move_to_next_dataset()
+                        extra_mb.append(self.next_iterator.next())
+                    mb = Example(fieldnames,
+                                       [dataset.valuesVStack(name,
+                                                             [mb[name]]+[b[name] for b in extra_mb])
+                                            for name in fieldnames])
+                    
+                self.next_row+=minibatch_size
+                self.next_dataset_row+=minibatch_size
+                if self.next_row+minibatch_size>len(dataset):
+                    self.move_to_next_dataset()
+                return examples
+        return VStackedIterator(self)
+                        
+class ArrayFieldsDataSet(DataSet):
+    """
+    Virtual super-class of datasets whose field values are numpy array,
+    thus defining valuesHStack and valuesVStack for sub-classes.
+    """
+    def __init__(self,description=None,field_types=None):
+        DataSet.__init__(self,description,field_types)
+    def untested_valuesHStack(self,fieldnames,fieldvalues):
+        """Concatenate field values horizontally, e.g. two vectors
+        become a longer vector, two matrices become a wider matrix, etc."""
+        return numpy.hstack(fieldvalues)
+    def untested_valuesVStack(self,fieldname,values):
+        """Concatenate field values vertically, e.g. two vectors
+        become a two-row matrix, two matrices become a longer matrix, etc."""
+        return numpy.vstack(values)
+
+
+
+class NArraysDataSet(ArrayFieldsDataSet) :
+    """
+    An NArraysDataSet stores fields that are numpy tensor, whose first axis
+    iterates over examples. It's a generalization of ArrayDataSet.
+    """
+    #@TODO not completely implemented yet
+    def __init__(self, data_arrays, fieldnames, **kwargs) :
+        """
+        Construct an NArraysDataSet from a list of numpy tensor (data_arrays) and a list
+        of fieldnames. The number of arrays must be the same as the number of
+        fieldnames. Each set of numpy tensor must have the same first dimension (first
+        axis) corresponding to the number of examples.
+
+        Every tensor is treated as a numpy array (using numpy.asarray)
+        """
+        ArrayFieldsDataSet.__init__(self,**kwargs)
+        assert len(data_arrays) == len(fieldnames)
+        assert len(fieldnames) > 0
+        ndarrays = [numpy.asarray(a) for a in data_arrays]
+        lens = [a.shape[0] for a in ndarrays]
+        num_examples = lens[0] #they must all be equal anyway
+        self._fieldnames = fieldnames
+        for k in ndarrays :
+            assert k.shape[0] == num_examples
+        self._datas = ndarrays
+        # create dict 
+        self.map_field_idx = dict()
+        for k in range(len(fieldnames)):
+            self.map_field_idx[fieldnames[k]] = k
+
+
+    def __len__(self) :
+        """
+        Length of the dataset is based on the first array = data_arrays[0], using its shape
+        """
+        return self._datas[0].shape[0]
+
+    def fieldNames(self) :
+        """
+        Returns the fieldnames as set in self.__init__
+        """
+        return self._fieldnames
+
+    def field_pos(self,fieldname) :
+        """
+        Returns the index of a given fieldname. Fieldname must exists! see fieldNames().
+        """
+        return self.map_field_idx[fieldname]
+
+    def minibatches_nowrap(self,fieldnames,minibatch_size,n_batches,offset):
+        cursor = Example(fieldnames,[0]*len(fieldnames))
+        fieldnames = self.fieldNames() if fieldnames is None else fieldnames
+        for n in xrange(n_batches):
+            if offset == len(self):
+                break
+            for f in range(len(cursor._names)) :
+                idx = self.field_pos(cursor._names[f])
+                sub_data = self._datas[idx][offset : offset+minibatch_size]
+                cursor._values[f] = sub_data
+            offset += len(sub_data) #can be less than minibatch_size at end
+            yield cursor
+
+        #return ArrayDataSetIterator(self,fieldnames,minibatch_size,n_batches,offset)
+
+
+
+
+class ArrayDataSet(ArrayFieldsDataSet):
+    """
+    An ArrayDataSet stores the fields as groups of columns in a numpy tensor,
+    whose first axis iterates over examples, second axis determines fields.
+    If the underlying array is N-dimensional (has N axes), then the field
+    values are (N-2)-dimensional objects (i.e. ordinary numbers if N=2).
+    """
+
+    def __init__(self, data_array, fields_columns, **kwargs):
+        """
+        Construct an ArrayDataSet from the underlying numpy array (data) and
+        a map (fields_columns) from fieldnames to field columns. The columns of a field are specified
+        using the standard arguments for indexing/slicing: integer for a column index,
+        slice for an interval of columns (with possible stride), or iterable of column indices.
+        """
+        ArrayFieldsDataSet.__init__(self, **kwargs)
+        self.data=data_array
+        self.fields_columns=fields_columns
+
+        # check consistency and complete slices definitions
+        for fieldname, fieldcolumns in self.fields_columns.items():
+            if type(fieldcolumns) is int:
+                assert fieldcolumns>=0 and fieldcolumns<data_array.shape[1]
+                if 1:
+                    #I changed this because it didn't make sense to me,
+                    # and it made it more difficult to write my learner.
+                    # If it breaks stuff, let's talk about it.
+                    # - James 22/05/2008
+                    self.fields_columns[fieldname]=[fieldcolumns]
+                else:
+                    self.fields_columns[fieldname]=fieldcolumns
+            elif type(fieldcolumns) is slice:
+                start,step=fieldcolumns.start,fieldcolumns.step
+                if not start:
+                    start=0
+                if not step:
+                    step=1
+                self.fields_columns[fieldname]=slice(start,fieldcolumns.stop,step)
+            elif hasattr(fieldcolumns,"__iter__"): # something like a list
+                for i in fieldcolumns:
+                    assert i>=0 and i<data_array.shape[1]
+
+    def fieldNames(self):
+        return self.fields_columns.keys()
+
+    def __len__(self):
+        return len(self.data)
+
+    def __getitem__(self,key):
+        """More efficient implementation than the default __getitem__"""
+        fieldnames=self.fields_columns.keys()
+        values=self.fields_columns.values()
+        if type(key) is int:
+            return Example(fieldnames,
+                           [self.data[key,col] for col in values])
+        if type(key) is slice:
+            return Example(fieldnames,[self.data[key,col] for col in values])
+        if type(key) is list:
+            for i in range(len(key)):
+                if self.hasFields(key[i]):
+                    key[i]=self.fields_columns[key[i]]
+            return Example(fieldnames,
+                               #we must separate differently for list as numpy
+                               # doesn't support self.data[[i1,...],[i2,...]]
+                               # when their is more then two i1 and i2
+                               [self.data[key,:][:,col]
+                               if isinstance(col,list) else
+                               self.data[key,col] for col in values])
+
+        # else check for a fieldname
+        if self.hasFields(key):
+            return self.data[:,self.fields_columns[key]]
+        # else we are trying to access a property of the dataset
+        assert key in self.__dict__ # else it means we are trying to access a non-existing property
+        return self.__dict__[key]
+        
+    def dontuse__iter__(self):
+        class ArrayDataSetIteratorIter(object):
+            def __init__(self,dataset,fieldnames):
+                if fieldnames is None: fieldnames = dataset.fieldNames()
+                # store the resulting minibatch in a lookup-list of values
+                self.minibatch = Example(fieldnames,[0]*len(fieldnames))
+                self.dataset=dataset
+                self.current=0
+                self.columns = [self.dataset.fields_columns[f] 
+                                for f in self.minibatch._names]
+                self.l = self.dataset.data.shape[0]
+            def __iter__(self):
+                return self
+            def next(self):
+                #@todo: we suppose that we need to stop only when minibatch_size == 1.
+                # Otherwise, MinibatchWrapAroundIterator do it.
+                if self.current>=self.l:
+                    raise StopIteration
+                sub_data =  self.dataset.data[self.current]
+                self.minibatch._values = [sub_data[c] for c in self.columns]
+
+                self.current+=1
+                return self.minibatch
+
+        return ArrayDataSetIteratorIter(self,self.fieldNames())
+
+    def minibatches_nowrap(self,fieldnames,minibatch_size,n_batches,offset):
+        cursor = Example(fieldnames,[0]*len(fieldnames))
+        fieldnames = self.fieldNames() if fieldnames is None else fieldnames
+        if n_batches == None:
+            n_batches = (len(self) - offset) / minibatch_size
+        for n in xrange(n_batches):
+            if offset == len(self):
+                break
+            sub_data = self.data[offset : offset+minibatch_size]
+            offset += len(sub_data) #can be less than minibatch_size at end
+            cursor._values = [sub_data[:,self.fields_columns[f]] for f in cursor._names]
+            yield cursor
+
+        #return ArrayDataSetIterator(self,fieldnames,minibatch_size,n_batches,offset)
+
+
+class CachedDataSet(DataSet):
+  """
+  Wrap a L{DataSet} whose values are computationally expensive to obtain
+  (e.g. because they involve some computation, or disk access),
+  so that repeated accesses to the same example are done cheaply,
+  by caching every example value that has been accessed at least once.
+
+  Optionally, for finite-length dataset, all the values can be computed
+  (and cached) upon construction of the CachedDataSet, rather at the
+  first access.
+
+  @todo: when cache_all_upon_construction create mini-batches that are as 
+  large as possible but not so large as to fill up memory.
+  
+  @todo: add disk-buffering capability, so that when the cache becomes too
+  big for memory, we cache things on disk, trying to keep in memory only
+  the record most likely to be accessed next.
+  """
+  def __init__(self,source_dataset,cache_all_upon_construction=False):
+      self.source_dataset=source_dataset
+      self.cache_all_upon_construction=cache_all_upon_construction
+      self.cached_examples = []
+      if cache_all_upon_construction:
+          # this potentially brings all the source examples
+          # into memory at once, which may be too much
+          # the work could possibly be done by minibatches
+          # that are as large as possible but no more than what memory allows.
+          #
+          # field_values is supposed to be an DataSetFields, that inherits from LookupList
+          #fields_values = source_dataset.minibatches(minibatch_size=len(source_dataset)).__iter__().next()
+          fields_values = DataSetFields(source_dataset,None)
+          assert all([len(self)==len(field_values) for field_values in fields_values])
+          for example in fields_values.examples():
+              self.cached_examples.append(copy.copy(example))
+
+      self.fieldNames = source_dataset.fieldNames
+      self.hasFields = source_dataset.hasFields
+      self.valuesHStack = source_dataset.valuesHStack
+      self.valuesVStack = source_dataset.valuesVStack
+      
+  def __len__(self):
+      return len(self.source_dataset)
+
+  def minibatches_nowrap(self,fieldnames,minibatch_size,n_batches,offset):
+      class CacheIterator(object):
+          def __init__(self,dataset):
+              self.dataset=dataset
+              self.current=offset
+              self.all_fields = self.dataset.fieldNames()==fieldnames
+              self.n_batches = n_batches
+              self.batch_counter = 0
+          def __iter__(self): return self
+          def next(self):
+              self.batch_counter += 1
+              if self.n_batches and self.batch_counter > self.n_batches :
+                  raise StopIteration()
+              upper = self.current+minibatch_size
+              if upper > len(self.dataset.source_dataset):
+                  raise StopIteration()
+              cache_len = len(self.dataset.cached_examples)
+              if upper>cache_len: # whole minibatch is not already in cache
+                  # cache everything from current length to upper
+                  #for example in self.dataset.source_dataset[cache_len:upper]:
+                  for example in self.dataset.source_dataset.subset[cache_len:upper]:
+                      self.dataset.cached_examples.append(example)
+              all_fields_minibatch = Example(self.dataset.fieldNames(),
+                                             zip(*self.dataset.cached_examples[self.current:self.current+minibatch_size]))
+
+              self.current+=minibatch_size
+              if self.all_fields:
+                  return all_fields_minibatch
+              return Example(fieldnames,[all_fields_minibatch[name] for name in fieldnames])
+      return CacheIterator(self)
+
+  def dontuse__getitem__(self,i):
+      if type(i)==int and len(self.cached_examples)>i:
+          return self.cached_examples[i]
+      else:
+          return self.source_dataset[i]
+      
+  def __iter__(self):
+      class CacheIteratorIter(object):
+          def __init__(self,dataset):
+              self.dataset=dataset
+              self.l = len(dataset)
+              self.current = 0
+              self.fieldnames = self.dataset.fieldNames()
+              self.example = Example(self.fieldnames,[0]*len(self.fieldnames))
+          def __iter__(self): return self
+          def next(self):
+              if self.current>=self.l:
+                  raise StopIteration
+              cache_len = len(self.dataset.cached_examples)
+              if self.current>=cache_len: # whole minibatch is not already in cache
+                  # cache everything from current length to upper
+                  self.dataset.cached_examples.append(
+                      self.dataset.source_dataset[self.current])
+              self.example._values = self.dataset.cached_examples[self.current]
+              self.current+=1
+              return self.example
+
+      return CacheIteratorIter(self)
+
+class ApplyFunctionDataSet(DataSet):
+    """
+    A L{DataSet} that contains as fields the results of applying a
+    given function example-wise or minibatch-wise to all the fields of
+    an input dataset.  The output of the function should be an iterable
+    (e.g. a list or a LookupList) over the resulting values.
+    
+    The function take as input the fields of the dataset, not the examples.
+
+    In minibatch mode, the function is expected to work on minibatches
+    (takes a minibatch in input and returns a minibatch in output). More
+    precisely, it means that each element of the input or output list
+    should be iterable and indexable over the individual example values
+    (typically these elements will be numpy arrays). All of the elements
+    in the input and output lists should have the same length, which is
+    the length of the minibatch.
+
+    The function is applied each time an example or a minibatch is accessed.
+    To avoid re-doing computation, wrap this dataset inside a CachedDataSet.
+
+    If the values_{h,v}stack functions are not provided, then
+    the input_dataset.values{H,V}Stack functions are used by default.
+
+    """
+
+    def __init__(self,input_dataset,function,output_names,minibatch_mode=True,
+                 values_hstack=None,values_vstack=None,
+                 description=None,fieldtypes=None):
+        """
+        Constructor takes an input dataset that has as many fields as the function
+        expects as inputs. The resulting dataset has as many fields as the function
+        produces as outputs, and that should correspond to the number of output names
+        (provided in a list).
+
+        Note that the expected semantics of the function differs in minibatch mode
+        (it takes minibatches of inputs and produces minibatches of outputs, as
+        documented in the class comment).
+
+        TBM: are fieldtypes the old field types (from input_dataset) or the new ones
+        (for the new dataset created)?
+        """
+        self.input_dataset=input_dataset
+        self.function=function
+        self.output_names=output_names
+        #print 'self.output_names in afds:', self.output_names
+        #print 'length in afds:', len(self.output_names)
+        self.minibatch_mode=minibatch_mode
+        DataSet.__init__(self,description,fieldtypes)
+        self.valuesHStack = values_hstack if values_hstack else input_dataset.valuesHStack
+        self.valuesVStack = values_vstack if values_vstack else input_dataset.valuesVStack
+
+    def __len__(self):
+        return len(self.input_dataset)
+
+    def fieldNames(self):
+        return self.output_names
+
+    def minibatches_nowrap(self, fieldnames, *args, **kwargs):
+        all_input_fieldNames = self.input_dataset.fieldNames()
+        mbnw = self.input_dataset.minibatches_nowrap
+
+        for input_fields in mbnw(all_input_fieldNames, *args, **kwargs):
+            if self.minibatch_mode:
+                all_output_fields = self.function(*input_fields)
+            else:
+                input_examples = zip(*input_fields) #makes so that [i] means example i
+                output_examples = [self.function(*input_example)
+                                    for input_example in input_examples]
+                all_output_fields = zip(*output_examples)
+
+            #print 'output_names=', self.output_names
+            #print 'all_output_fields', all_output_fields
+            #print 'len(all_output_fields)=', len(all_output_fields)
+            all_outputs = Example(self.output_names, all_output_fields)
+            if fieldnames==self.output_names:
+                rval = all_outputs
+            else:
+                rval = Example(fieldnames,[all_outputs[name] for name in fieldnames])
+            #print 'rval', rval
+            #print '--------'
+            yield rval
+
+    def untested__iter__(self): # only implemented for increased efficiency
+        class ApplyFunctionSingleExampleIterator(object):
+            def __init__(self,output_dataset):
+                self.current=0
+                self.output_dataset=output_dataset
+                self.input_iterator=output_dataset.input_dataset.__iter__()
+            def __iter__(self): return self
+            def next(self):
+                if self.output_dataset.minibatch_mode:
+                    function_inputs = [[input] for input in self.input_iterator.next()]
+                    outputs = self.output_dataset.function(*function_inputs)
+                    assert all([hasattr(output,'__iter__') for output in outputs])
+                    function_outputs = [output[0] for output in outputs]
+                else:
+                    function_inputs = self.input_iterator.next()
+                    function_outputs = self.output_dataset.function(*function_inputs)
+                return Example(self.output_dataset.output_names,function_outputs)
+        return ApplyFunctionSingleExampleIterator(self)
+    
+def supervised_learning_dataset(src_dataset,input_fields,target_fields,weight_field=None):
+    """
+    Wraps an arbitrary L{DataSet} into one for supervised learning tasks
+    by forcing the user to define a set of fields as the 'input' field
+    and a set of fields as the 'target' field. Optionally, a single
+    weight_field can also be defined.
+    """
+    args = ((input_fields,'input'),(output_fields,'target'))
+    if weight_field: args+=(([weight_field],'weight'))
+    return src_dataset.merge_fields(*args)
+
+        
+
+    
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/old_dataset/learner.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/old_dataset/learner.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,135 @@
+
+
+from exceptions import *
+from dataset import AttributesHolder
+
+class OfflineLearningAlgorithm(object):
+    """
+    Base class for offline learning algorithms, provides an interface
+    that allows various algorithms to be applicable to generic learning
+    algorithms. It is only given here to define the expected semantics.
+
+    An offline learning algorithm can be seen as a function that when
+    applied to training data returns a learned function (which is an object that
+    can be applied to other data and return some output data).
+
+    The offline learning scenario is the standard and most common one 
+    in machine learning:  an offline learning algorithm is applied
+    to a training dataset, 
+
+        model = learning_algorithm(training_set)
+        
+    resulting in a fully trained model that can be applied to another dataset
+    in order to perform some desired computation:
+
+        output_dataset = model(input_dataset)
+
+    Note that the application of a dataset has no side-effect on the model.
+    In that example, the training set may for example have 'input' and 'target'
+    fields while the input dataset may have only 'input' (or both 'input' and
+    'target') and the output dataset would contain some default output fields defined
+    by the learning algorithm (e.g. 'output' and 'error'). The user may specifiy
+    what the output dataset should contain either by setting options in the
+    model, by the presence of particular fields in the input dataset, or with
+    keyword options of the __call__ method of the model (see LearnedModel.__call__).
+
+    """
+
+    def __init__(self): pass
+
+    def __call__(self, training_dataset):
+        """
+        Return a fully trained TrainedModel.
+        """
+        raise AbstractFunction()
+    
+class TrainedModel(AttributesHolder):
+    """
+    TrainedModel is a base class for models returned by instances of an
+    OfflineLearningAlgorithm subclass. It is only given here to define the expected semantics.
+    """
+    def __init__(self):
+        pass
+
+    def __call__(self,input_dataset,output_fieldnames=None,
+                 test_stats_collector=None,copy_inputs=False,
+                 put_stats_in_output_dataset=True,
+                 output_attributes=[]):
+        """
+        A L{TrainedModel} can be used with
+        with one or more calls to it. The main argument is an input L{DataSet} (possibly
+        containing a single example) and the result is an output L{DataSet} of the same length.
+        If output_fieldnames is specified, it may be use to indicate which fields should
+        be constructed in the output L{DataSet} (for example ['output','classification_error']).
+        Otherwise, some default output fields are produced (possibly depending on the input
+        fields available in the input_dataset).
+        Optionally, if copy_inputs, the input fields (of the input_dataset) can be made
+        visible in the output L{DataSet} returned by this method.
+        Optionally, attributes of the learner can be copied in the output dataset,
+        and statistics computed by the stats collector also put in the output dataset.
+        Note the distinction between fields (which are example-wise quantities, e.g. 'input')
+        and attributes (which are not, e.g. 'regularization_term').
+        """
+        raise AbstractFunction()
+
+
+class OnlineLearningAlgorithm(object):
+    """
+    Base class for online learning algorithms, provides an interface
+    that allows various algorithms to be applicable to generic online learning
+    algorithms. It is only given here to define the expected semantics.
+
+    The basic setting is that the training data are only revealed in pieces
+    (maybe one example or a batch of example at a time):
+
+       model = learning_algorithm()
+
+    results in a fresh model. The model can be adapted by presenting
+    it with some training data,
+
+       model.update(some_training_data)
+       ...
+       model.update(some_more_training_data)
+       ...
+       model.update(yet_more_training_data)
+
+    and at any point one can use the model to perform some computation:
+    
+       output_dataset = model(input_dataset)
+
+    The model should be a LearnerModel subclass instance, and LearnerModel
+    is a subclass of LearnedModel.
+
+    """
+
+    def __init__(self): pass
+
+    def __call__(self, training_dataset=None):
+        """
+        Return a LearnerModel, either fresh (if training_dataset is None) or fully trained (otherwise).
+        """
+        raise AbstractFunction()
+    
+class LearnerModel(TrainedModel):
+    """
+    LearnerModel is a base class for models returned by instances of a LearningAlgorithm subclass.
+    It is only given here to define the expected semantics.
+    """
+    def __init__(self):
+        pass
+
+    def update(self,training_set,train_stats_collector=None):
+        """
+        Continue training a learner model, with the evidence provided by the given training set.
+        Hence update can be called multiple times. This is the main method used for training in the
+        on-line setting or the sequential (Bayesian or not) settings.
+
+        This function has as side effect that self(data) will behave differently,
+        according to the adaptation achieved by update().
+
+        The user may optionally provide a training L{StatsCollector} that is used to record
+        some statistics of the outputs computed during training. It is update(d) during
+        training.
+        """
+        raise AbstractFunction()
+    
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/old_dataset/lookup_list.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/old_dataset/lookup_list.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,134 @@
+
+from copy import deepcopy
+
+class LookupList(object):
+    """
+    A LookupList is a sequence whose elements can be named (and unlike
+    a dictionary the order of the elements depends not on their key but
+    on the order given by the user through construction) so that
+    following syntactic constructions work as one would expect::
+       >>> example = LookupList(['x','y','z'],[1,2,3])
+       >>> example['x'] = [1, 2, 3] # set or change a field
+       >>> print example('z','y') # prints [3,2]
+       >>> x, y, z = example
+       >>> x = example[0]
+       >>> x = example["x"]
+       >>> print example.keys() # prints ['x','y','z']
+       >>> print example.values() # prints [[1,2,3],2,3]
+       >>> print example.items() # prints [('x',[1,2,3]),('y',2),('z',3)]
+       >>> example.append_keyval('u',0) # adds item with name 'u' and value 0
+       >>> print len(example) # number of items = 4 here
+       >>> example2 = LookupList(['v', 'w'], ['a','b'])
+       >>> print example+example2 # addition is like for lists, a concatenation of the items.
+       >>> example + example # throw an error as we can't have duplicate name.
+
+    @note: The element names should be unique.
+
+    @todo: Convert this documentation into doctest
+    format, and actually perform doctest'ing:
+    U{http://epydoc.sourceforge.net/manual-epytext.html#doctest-blocks}
+    """
+    def __init__(self,names=[],values=[]):
+        #print 'values=', values
+        #print 'length=', len(values)
+        #print 'names=', names
+        #print 'length=',len(names)
+        assert len(values)==len(names)
+        self.__dict__['_values']=values
+        self.__dict__['_name2index']={}
+        self.__dict__['_names']=names
+        for i in xrange(len(values)):
+            assert names[i] not in self._name2index
+            self._name2index[names[i]]=i
+
+    def keys(self):
+        return self._names
+
+    def values(self):
+        return self._values
+
+    def items(self):
+        """
+        Return a list of (name,value) pairs of all the items in the look-up list.
+        """
+        return zip(self._names,self._values)
+    
+    def __getitem__(self,key):
+        """
+        The key in example[key] can either be an integer to index the fields
+        or the name of the field.
+        """
+        if isinstance(key,int) or isinstance(key,slice) or (isinstance(key,list) and all([isinstance(i,int) for i in key])):
+            return self._values[key]
+        else: # if not an int, key must be a name
+            # expecting key to be a valid field name
+            assert isinstance(key,str)
+            return self._values[self._name2index[key]]
+    
+    def __setitem__(self,key,value):
+        if isinstance(key,int):
+            self._values[key]=value
+        else: # if not an int, key must be a name
+            if key in self._name2index:
+                self._values[self._name2index[key]]=value
+            else:
+                self.append_keyval(key,value)
+            
+    def append_keyval(self, key, value):
+        assert key not in self._name2index
+        self._name2index[key]=len(self)
+        self._values.append(value)
+        self._names.append(key)
+
+    def append_lookuplist(self, *list):
+        for l in list:
+            for key in l.keys():
+                self.append_keyval(key,l[key])
+        del l
+
+    def __len__(self):
+        return len(self._values)
+
+    def __repr__(self):
+        return "{%s}" % ", ".join([str(k) + "=" + repr(v) for k,v in self.items()])
+
+    def __add__(self,rhs):
+        new_example = deepcopy(self)
+        for item in rhs.items():
+            new_example.append_keyval(item[0],item[1])
+        return new_example
+
+    def __radd__(self,lhs):
+        new_example = deepcopy(lhs)
+        for item in self.items():
+            new_example.append_keyval(item[0],item[1])
+        return new_example
+
+    def __eq__(self, other):
+        return self._values==other._values and self._name2index==other._name2index and self._names==other._names
+
+    def __ne__(self, other):
+        return not self.__eq__(other)
+
+    def __hash__(self):
+        raise NotImplementedError()
+
+    def __call__(self,*names):
+        """
+        Return a list of values associated with the given names (which must all be keys of the lookup list).
+        """
+        if names == self._names:
+            return self._values
+        return [self[name] for name in names]
+
+
+if __name__ == '__main__':
+
+    a=LookupList(['a'],[1])
+    print a
+    b=LookupList(['b'],[2])
+    print b
+    a.append_lookuplist(b)
+    print a
+    a.append_lookuplist(b)
+    print a
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/sandbox/README.txt
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/sandbox/README.txt	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,1 @@
+Stuff in the sandbox may be very broken and/or in flux.
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/sandbox/__init__.py
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/sandbox/_test_random_transformation.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/sandbox/_test_random_transformation.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,84 @@
+from random_transformation import row_random_transformation
+
+import unittest
+from theano import compile
+from theano import gradient
+
+from theano.sparse import _is_dense, _is_sparse, _is_dense_result, _is_sparse_result
+from theano.sparse import _mtypes, _mtype_to_str
+from theano.sparse import as_sparse
+
+from theano.tensor import as_tensor
+from theano.scalar import as_scalar
+
+import random
+import numpy.random
+
+class T_RowRandomTransformation(unittest.TestCase):
+    def setUp(self):
+        random.seed(44)
+        numpy.random.seed(44)
+
+    def test_basic(self):
+        rows = 4
+        cols = 20
+        fakeseed = 0
+        length = 3 
+        md = numpy.random.rand(rows, cols)
+        for mtype in _mtypes:
+            m = as_sparse(mtype(md))
+            o = row_random_transformation(m, length, initial_seed=fakeseed)
+            y = compile.eval_outputs([o])
+            expected = "[[ 0.88239119  1.03244463 -1.29297503]\n [ 0.02644961  1.50119695 -0.025081  ]\n [-0.60741013  1.25424625  0.30119422]\n [-1.08659967 -0.35531544 -1.38915467]]"
+            self.failUnless(str(y) == expected)
+
+    def test_length(self):
+        """ Test that if length is increased, we obtain the same results
+        (except longer). """
+
+        for i in range(10):
+            mtype = random.choice(_mtypes)
+            rows = random.randint(1, 20)
+            cols = random.randint(1, 20)
+            fakeseed = random.randint(0, 100)
+            length = random.randint(1, 10)
+            extralength = random.randint(1, 10)
+
+            m = as_sparse(mtype(numpy.random.rand(rows, cols)))
+            o1 = row_random_transformation(m, length, initial_seed=fakeseed)
+            o2 = row_random_transformation(m, length + extralength, initial_seed=fakeseed)
+
+            y1 = compile.eval_outputs([o1])
+            y2 = compile.eval_outputs([o2])
+
+            self.failUnless((y1 == y2[:,:length]).all())
+
+    def test_permute(self):
+        """ Test that if the order of the rows is permuted, we obtain the same results. """
+        for i in range(10):
+            mtype = random.choice(_mtypes)
+            rows = random.randint(2, 20)
+            cols = random.randint(1, 20)
+            fakeseed = random.randint(0, 100)
+            length = random.randint(1, 10)
+
+            permute = numpy.random.permutation(rows)
+
+
+            m1 = numpy.random.rand(rows, cols)
+            m2 = m1[permute]
+            for r in range(rows):
+                self.failUnless((m2[r] == m1[permute[r]]).all())
+            s1 = as_sparse(mtype(m1))
+            s2 = as_sparse(mtype(m2))
+            o1 = row_random_transformation(s1, length, initial_seed=fakeseed)
+            o2 = row_random_transformation(s2, length, initial_seed=fakeseed)
+            y1 = compile.eval_outputs([o1])
+            y2 = compile.eval_outputs([o2])
+
+            self.failUnless(y1.shape == y2.shape)
+            for r in range(rows):
+                self.failUnless((y2[r] == y1[permute[r]]).all())
+
+if __name__ == '__main__':
+    unittest.main()
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/sandbox/denoising_aa.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/sandbox/denoising_aa.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,224 @@
+"""
+A denoising auto-encoder
+
+@warning: You should use this interface. It is not complete and is not functional.
+Instead, use::
+    ssh://projects@lgcm.iro.umontreal.ca/repos/denoising_aa
+"""
+
+import theano
+from theano.formula import *
+from learner import *
+from theano import tensor as t
+from nnet_ops import *
+import math
+from misc import *
+from misc_theano import *
+from theano.tensor_random import binomial
+
+def hiding_corruption_formula(seed,average_fraction_hidden):
+    """
+    Return a formula for the corruption process, in which a random
+    subset of the input numbers are hidden (mapped to 0). 
+
+    @param seed: seed of the random generator
+    @type seed: anything that numpy.random.RandomState accepts
+    
+    @param average_fraction_hidden: the probability with which each
+                                    input number is hidden (set to 0).
+    @type average_fraction_hidden: 0 <= real number <= 1
+    """
+    class HidingCorruptionFormula(Formulas):
+        x = t.matrix()
+        corrupted_x = x * binomial(seed,x,1,fraction_sampled)
+
+    return HidingCorruptionFormula()
+
+def squash_affine_formula(squash_function=sigmoid):
+    """
+    Simply does: squash_function(b + xW)
+    By convention prefix the parameters by _
+    """
+    class SquashAffineFormula(Formulas):
+        x = t.matrix() # of dimensions minibatch_size x n_inputs
+        _b = t.row() # of dimensions 1 x n_outputs
+        _W = t.matrix() # of dimensions n_inputs x n_outputs
+        a = _b + t.dot(x,_W) # of dimensions minibatch_size x n_outputs
+        y = squash_function(a)
+    return SquashAffineFormula()
+
+def gradient_descent_update_formula():
+    class GradientDescentUpdateFormula(Formula):
+        param = t.matrix()
+        learning_rate = t.scalar()
+        cost = t.column() # cost of each example in a minibatch
+        param_update = t.add_inplace(param, -learning_rate*t.sgrad(cost))
+    return gradient_descent_update_formula()
+    
+def probabilistic_classifier_loss_formula():
+    class ProbabilisticClassifierLossFormula(Formulas):
+        a = t.matrix() # of dimensions minibatch_size x n_classes, pre-softmax output
+        target_class = t.ivector() # dimension (minibatch_size)
+        nll, probability_predictions = crossentropy_softmax_1hot(a, target_class) # defined in nnet_ops.py
+    return ProbabilisticClassifierLossFormula()
+
+def binomial_cross_entropy_formula():
+    class BinomialCrossEntropyFormula(Formulas):
+        a = t.matrix() # pre-sigmoid activations, minibatch_size x dim
+        p = sigmoid(a) # model prediction
+        q = t.matrix() # target binomial probabilities, minibatch_size x dim
+        # using the identity softplus(a) - softplus(-a) = a,
+        # we obtain that q log(p) + (1-q) log(1-p) = q a - softplus(a)
+        nll = -t.sum(q*a - softplus(-a))
+    # next line was missing... hope it's all correct above
+    return BinomialCrossEntropyFormula()
+
+def squash_affine_autoencoder_formula(hidden_squash=t.tanh,
+                                      reconstruction_squash=sigmoid,
+                                      share_weights=True,
+                                      reconstruction_nll_formula=binomial_cross_entropy_formula(),
+                                      update_formula=gradient_descent_update_formula):
+    if share_weights:
+        autoencoder = squash_affine_formula(hidden_squash).rename(a='code_a') + \
+                      squash_affine_formula(reconstruction_squash).rename(x='hidden',y='reconstruction',_b='_c') + \
+                      reconstruction_nll_formula
+    else:
+        autoencoder = squash_affine_formula(hidden_squash).rename(a='code_a',_W='_W1') + \
+                      squash_affine_formula(reconstruction_squash).rename(x='hidden',y='reconstruction',_b='_c',_W='_W2') + \
+                      reconstruction_nll_formula
+    autoencoder = autoencoder + [update_formula().rename(cost = 'nll',
+                                                         param = p)
+                                 for p in autoencoder.get_all('_.*')]
+    return autoencoder
+
+    
+# @todo: try other corruption formulae. The above is the default one.
+# not quite used in the ICML paper... (had a fixed number of 0s).
+
+class DenoisingAutoEncoder(LearningAlgorithm):
+    
+    def __init__(self,n_inputs,n_hidden_per_layer,
+                 learning_rate=0.1,
+                 max_n_epochs=100,
+                 L1_regularizer=0,
+                 init_range=1.,
+                 corruption_formula = hiding_corruption_formula(),
+                 autoencoder = squash_affine_autoencoder_formula(),
+                 minibatch_size=None,linker = "c|py"):
+        for name,val in locals().items():
+            if val is not self: self.__setattribute__(name,val)
+        self.denoising_autoencoder_formula = corruption_formula + autoencoder.rename(x='corrupted_x')
+        
+    def __call__(self, training_set=None):
+        """ Allocate and optionnaly train a model
+
+        @TODO enables passing in training and valid sets, instead of cutting one set in 80/20
+        """
+        model = DenoisingAutoEncoderModel(self)
+        if training_set:
+            print 'DenoisingAutoEncoder(): what do I do if training_set????'
+            # copied from old mlp_factory_approach:
+            if len(trainset) == sys.maxint:
+                raise NotImplementedError('Learning from infinite streams is not supported')
+            nval = int(self.validation_portion * len(trainset))
+            nmin = len(trainset) - nval
+            assert nmin >= 0
+            minset = trainset[:nmin] #real training set for minimizing loss
+            valset = trainset[nmin:] #validation set for early stopping
+            best = model
+            for stp in self.early_stopper():
+                model.update(
+                    minset.minibatches([input, target], minibatch_size=min(32,
+                        len(trainset))))
+                #print 'mlp.__call__(), we did an update'
+                if stp.set_score:
+                    stp.score = model(valset, ['loss_01'])
+                    if (stp.score < stp.best_score):
+                        best = copy.copy(model)
+            model = best
+            # end of the copy from mlp_factory_approach
+ 
+        return model
+
+            
+    def compile(self, inputs, outputs):
+        return theano.function(inputs,outputs,unpack_single=False,linker=self.linker)
+    
+class DenoisingAutoEncoderModel(LearnerModel):
+    def __init__(self,learning_algorithm,params):
+        self.learning_algorithm=learning_algorithm
+        self.params=params
+        v = learning_algorithm.v
+        self.update_fn = learning_algorithm.compile(learning_algorithm.denoising_autoencoder_formula.inputs,
+                                                    learning_algorithm.denoising_autoencoder_formula.outputs)
+
+    def update(self, training_set, train_stats_collector=None):
+        
+        print 'dont update you crazy frog!'
+
+# old stuff
+
+#         self._learning_rate = t.scalar('learning_rate') # this is the symbol
+#         self.L1_regularizer = L1_regularizer
+#         self._L1_regularizer = t.scalar('L1_regularizer')
+#         self._input = t.matrix('input') # n_examples x n_inputs
+#         self._W = t.matrix('W')
+#         self._b = t.row('b')
+#         self._c = t.row('b')
+#         self._regularization_term = self._L1_regularizer * t.sum(t.abs(self._W))
+#         self._corrupted_input = corruption_process(self._input)
+#         self._hidden = t.tanh(self._b + t.dot(self._input, self._W.T))
+#         self._reconstruction_activations =self._c+t.dot(self._hidden,self._W)
+#         self._nll,self._output = crossentropy_softmax_1hot(Print("output_activations")(self._output_activations),self._target_vector)
+#         self._output_class = t.argmax(self._output,1)
+#         self._class_error = t.neq(self._output_class,self._target_vector)
+#         self._minibatch_criterion = self._nll + self._regularization_term / t.shape(self._input)[0]
+#         OnlineGradientTLearner.__init__(self)
+            
+#     def attributeNames(self):
+#         return ["parameters","b1","W2","b2","W2", "L2_regularizer","regularization_term"]
+
+#     def parameterAttributes(self):
+#         return ["b1","W1", "b2", "W2"]
+    
+#     def updateMinibatchInputFields(self):
+#         return ["input","target"]
+    
+#     def updateEndOutputAttributes(self):
+#         return ["regularization_term"]
+
+#     def lossAttribute(self):
+#         return "minibatch_criterion"
+    
+#     def defaultOutputFields(self, input_fields):
+#         output_fields = ["output", "output_class",]
+#         if "target" in input_fields:
+#             output_fields += ["class_error", "nll"]
+#         return output_fields
+        
+#     def allocate(self,minibatch):
+#         minibatch_n_inputs  = minibatch["input"].shape[1]
+#         if not self._n_inputs:
+#             self._n_inputs = minibatch_n_inputs
+#             self.b1 = numpy.zeros((1,self._n_hidden))
+#             self.b2 = numpy.zeros((1,self._n_outputs))
+#             self.forget()
+#         elif self._n_inputs!=minibatch_n_inputs:
+#             # if the input changes dimension on the fly, we resize and forget everything
+#             self.forget()
+            
+#     def forget(self):
+#         if self._n_inputs:
+#             r = self._init_range/math.sqrt(self._n_inputs)
+#             self.W1 = numpy.random.uniform(low=-r,high=r,
+#                                            size=(self._n_hidden,self._n_inputs))
+#             r = self._init_range/math.sqrt(self._n_hidden)
+#             self.W2 = numpy.random.uniform(low=-r,high=r,
+#                                            size=(self._n_outputs,self._n_hidden))
+#             self.b1[:]=0
+#             self.b2[:]=0
+#             self._n_epochs=0
+
+#     def isLastEpoch(self):
+#         self._n_epochs +=1
+#         return self._n_epochs>=self._max_n_epochs
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/sandbox/gradient_learner.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/sandbox/gradient_learner.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,71 @@
+
+from learner import *
+from tensor import *
+import gradient
+from compile import Function
+
+class GradientLearner(Learner):
+    """
+    Base class for gradient-based optimization of a training criterion
+    that can consist in two parts, an additive part over examples, and
+    an example-independent part (usually called the regularizer).
+    The user provides a Theano formula that maps the fields of a minibatch (each being a tensor with the
+    same number of rows = minibatch size) and parameters to output fields (for the use function), one of which
+    must be a cost that is the training criterion to be minimized. Subclasses implement
+    a training strategy that uses the Theano formula to compute gradients and
+    to compute outputs in the update method.
+    The inputs, parameters, and outputs are lists of Theano tensors,
+    while the example_wise_cost and regularization_term are Theano tensors.
+    The user can specify a regularization coefficient that multiplies the regularization term.
+    The training algorithm looks for parameters that minimize
+       regularization_coefficient * regularization_term(parameters) +
+       sum_{inputs in training_set} example_wise_cost(inputs,parameters)
+    i.e. the regularization_term should not depend on the inputs, only on the parameters.
+    The learned function can map a subset of inputs to a subset of outputs (as long as the inputs subset
+    includes all the inputs required in the Theano expression for the selected outputs).
+    It is assumed that all the inputs are provided in the training set (as dataset fields
+    with the corresponding name), but not necessarily when using the learned function.
+    """
+    def __init__(self, inputs, parameters, outputs, example_wise_cost, regularization_term=astensor(0.0),
+                 regularization_coefficient = astensor(1.0)):
+        self.inputs = inputs
+        self.outputs = outputs
+        self.parameters = parameters
+        self.example_wise_cost = example_wise_cost
+        self.regularization_term = regularization_term
+        self.regularization_coefficient = regularization_coefficient
+        self.parameters_example_wise_gradient = gradient.grad(example_wise_cost, parameters)
+        self.parameters_regularization_gradient = gradient.grad(self.regularization_coefficient * regularization_term, parameters)
+        if example_wise_cost not in outputs:
+            outputs.append(example_wise_cost)
+        if regularization_term not in outputs:
+            outputs.append(regularization_term)
+        self.example_wise_gradient_fn = Function(inputs + parameters, 
+                                       [self.parameters_example_wise_gradient + self.parameters_regularization_gradient])
+        self.use_functions = {frozenset([input.name for input in inputs]+[output.name for output in outputs])
+                                        : Function(inputs, outputs)}
+
+    def use(self,input_dataset,output_fields=None,copy_inputs=True):
+        # obtain the function that maps the desired inputs to desired outputs
+        input_fields = input_dataset.fieldNames()
+        # map names of input fields to Theano tensors in self.inputs
+        input_variables = ???
+        if output_fields is None: output_fields = [output.name for output in outputs]
+        # handle special case of inputs that are directly copied into outputs
+        # map names of output fields to Theano tensors in self.outputs
+        output_variables = ???
+        use_function_key = input_fields+output_fields
+        if not self.use_functions.has_key(use_function_key):
+            self.use_function[use_function_key]=Function(input_variables,output_variables)
+        use_function = self.use_functions[use_function_key]
+        # return a dataset that computes the outputs
+        return input_dataset.apply_function(use_function,input_fields,output_fields,copy_inputs,compute_now=True)
+    
+
+class StochasticGradientDescent(object):
+    def update_parameters(self):
+        
+class StochasticGradientLearner(GradientLearner,StochasticGradientDescent):
+    def __init__(self,inputs, parameters, outputs, example_wise_cost, regularization_term=astensor(0.0),
+                 regularization_coefficient = astensor(1.0),)
+    def update()
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/sandbox/image_tools.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/sandbox/image_tools.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,39 @@
+
+import numpy
+
+
+def make_weights_image(mat, xres, yres, i, j, nrow, ncol):
+    """
+    Displays the filters implemented by a weight matrix.
+
+    Each filter corresponds to a row of mat and will be represented
+    by a xres*yres image.
+
+    Units from i to j will be included in the picture.
+
+    The picture will have nrow rows of filters and ncol columns
+    of filters. Unused spots for filters will be filled with zeros.
+
+    The return value is a matrix suitable for display with
+    matplotlib's imshow.
+    """
+
+    assert j > i
+    n = j - i
+    result = numpy.zeros((ncol * xres, nrow * yres))
+    submat = mat[i:j]
+    for k, row in enumerate(submat):
+        x = (k % ncol)*xres
+        y = (k / ncol)*yres
+        entry = row.reshape((xres, yres))
+        lmin, lmax = numpy.min(entry), numpy.max(entry)
+        ldiff = lmax - lmin
+        #entry = (entry - lmin) / ldiff
+        result[x:x + xres, y:y + yres] = entry
+    return result.T
+
+
+
+
+
+
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/sandbox/random_transformation.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/sandbox/random_transformation.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,132 @@
+"""
+New L{Op}s that aren't in core theano
+"""
+
+from theano import sparse
+from theano import tensor
+from theano import scalar
+from theano.gof import op
+
+from theano.sparse import _is_dense, _is_sparse, _is_dense_result, _is_sparse_result
+
+import scipy.sparse
+
+import numpy
+
+class RowRandomTransformation(op.Op):
+    """
+    Given C{x}, a (sparse) matrix with shape (exmpls, dimensions), we
+    multiply it by a deterministic random matrix of shape (dimensions,
+    length) to obtain random transformation output of shape (exmpls,
+    length).
+
+    Each element of the deterministic random matrix is selected uniformly
+    from [-1, +1).
+    @todo: Use another random distribution?
+
+    @note: This function should be written such that if length is
+    increased, we obtain the same results (except longer). Similarly,
+    the rows should be able to be permuted and get the same result in
+    the same fashion.
+
+    @todo: This may be slow?
+    @todo: Rewrite for dense matrices too?
+    @todo: Is there any way to verify the convention that each row is
+    an example? Should I rename the variables in the code to make the
+    semantics more explicit?
+    @todo: AUTOTEST: Autotest that dense and spare versions of this are identical.
+    @todo: Rename? Is Row the correct name? Maybe column-wise?
+
+    @type  x: L{scipy.sparse.spmatrix}
+    @param x: Sparse matrix to be randomly transformed with shape (exmpls, dimensions)
+    @type  length: int
+    @param length: The number of transformations of C{x} to be performed.
+    @param initial_seed: Initial seed for the RNG.
+    @rtype: L{numpy.ndarray}
+    @return: Array with C{length} random transformations, with shape (exmpls, length)
+    """
+
+    import random
+    """
+    RNG used for random transformations.
+    Does not share state with rest of program.
+    @todo: Make STATIC and private. Ask James or Olivier how to make this more Pythonic.
+    """
+    _trng = random.Random()
+
+    def __init__(self, x, length, initial_seed=0, **kwargs):
+        """
+        @todo: Which broadcastable values should I use?
+        """
+        assert 0        # Needs to be updated to Olivier's new Op creation approach
+        op.Op.__init__(self, **kwargs)
+        x = sparse.as_sparse(x)
+        self.initial_seed = initial_seed
+        self.length = length
+        self.inputs = [x]
+        self.outputs = [tensor.Tensor(x.dtype, broadcastable=[False, False])]
+#        self.outputs = [tensor.Tensor(x.dtype, broadcastable=[True, True])]
+
+    def _random_matrix_value(self, row, col, rows):
+        """
+        From a deterministic random matrix, find one element.
+        @param row: The row of the element to be read.
+        @param col: The column of the element to be read.
+        @param row: The number of rows in the matrix.
+        @type row: int
+        @type col: int
+        @type rows: int
+        @note: This function is designed such that if we extend
+        the number of columns in the random matrix, the values of
+        the earlier entries is unchanged.
+        @todo: Make this static
+        """
+        # Choose the random entry at (l, c)
+        rngidx = col * rows + row
+        # Set the random number state for this random entry
+        # Note: This may be slow
+        self._trng.seed(rngidx + self.initial_seed)
+
+        # Determine the value for this entry
+        val = self._trng.uniform(-1, +1)
+#       print "Exmpl #%d, dimension #%d => Random projection #%d has idx %d (+ seed %d) and value %f" % (r, c, j, rngidx, self.initial_seed, val)
+        return val
+
+    def impl(self, xorig):
+        assert _is_sparse(xorig)
+        assert len(xorig.shape) == 2
+        # Since conversions to and from the COO format are quite fast, you
+        # can use this approach to efficiently implement lots computations
+        # on sparse matrices.
+        x = xorig.tocoo()
+        (rows, cols) = x.shape
+        tot = rows * cols
+        out = numpy.zeros((rows, self.length))
+#        print "l = %d" % self.length
+#        print "x.getnnz() = %d" % x.getnnz()
+        all = zip(x.col, x.row, x.data)
+        all.sort()      # TODO: Maybe this is very slow?
+        lastc = None
+        lastl = None
+        lastval = None
+        for l in range(self.length):
+            for (c, r, data) in all:
+                assert c < cols
+                assert r < rows
+                if not c == lastc or not l == lastl:
+                    lastc = c
+                    lastl = l
+                    lastval = self._random_matrix_value(c, l, cols)
+                val = lastval
+#                val = self._random_matrix_value(c, l, cols)
+#                val = self._trng.uniform(-1, +1)
+#                val = 1.0
+                out[r][l] += val * data
+        return out
+    def __copy__(self):
+        return self.__class__(self.inputs[0], self.length, self.initial_seed)
+    def clone_with_new_inputs(self, *new_inputs):
+        return self.__class__(new_inputs[0], self.length, self.initial_seed)
+    def desc(self, *new_inputs):
+        return (self.__class__, self.length, self.initial_seed)
+row_random_transformation = RowRandomTransformation()
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/sandbox/rbm/README.txt
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/sandbox/rbm/README.txt	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,4 @@
+An RBM with binomial units trained with CD-1.
+by Joseph Turian
+    
+This seems to work fine.
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/sandbox/rbm/__init__.py
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/sandbox/rbm/main.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/sandbox/rbm/main.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,26 @@
+#!/usr/bin/python
+"""
+Simple SGD RBM training.
+(An example of how to use the model.)
+"""
+
+
+import numpy
+
+nonzero_instances = []
+#nonzero_instances.append({0: 1, 1: 1})
+#nonzero_instances.append({0: 1, 2: 1})
+
+nonzero_instances.append({1: 0.1, 5: 0.5, 9: 1})
+nonzero_instances.append({2: 0.3, 5: 0.5, 8: 0.8})
+nonzero_instances.append({1: 0.2, 2: 0.3, 5: 0.5})
+
+import model
+model = model.Model(input_dimension=10, hidden_dimension=6)
+
+for i in xrange(100000):
+    # Select an instance
+    instance = nonzero_instances[i % len(nonzero_instances)]
+
+    # SGD update over instance
+    model.update([instance])
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/sandbox/rbm/model.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/sandbox/rbm/model.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,139 @@
+"""
+The model for an autoassociator for sparse inputs, using Ronan Collobert + Jason
+Weston's sampling trick (2008).
+"""
+
+import parameters
+
+import numpy
+from numpy import dot
+import random
+
+import pylearn.nnet_ops
+import pylearn.sparse_instance
+
+def sigmoid(v):
+    """
+    @todo: Move to pylearn.more_numpy
+    @todo: Fix to avoid floating point overflow.
+    """
+#    if x < -30.0: return 0.0
+#    if x > 30.0: return 1.0 
+    return 1.0 / (1.0 + numpy.exp(-v))
+
+def sample(v):
+    """
+    @todo: Move to pylearn.more_numpy
+    """
+    assert len(v.shape) == 2
+    x = numpy.zeros(v.shape)
+    for j in range(v.shape[0]):
+        for i in range(v.shape[1]):
+            assert v[j][i] >= 0 and v[j][i] <= 1
+            if random.random() < v[j][i]: x[j][i] = 1
+            else: x[j][i] = 0
+    return x
+
+def crossentropy(output, target):
+    """
+    Compute the crossentropy of binary output wrt binary target.
+    @note: We do not sum, crossentropy is computed by component.
+    @todo: Rewrite as a scalar, and then broadcast to tensor.
+    @todo: Move to pylearn.more_numpy
+    @todo: Fix to avoid floating point overflow.
+    """
+    return -(target * numpy.log(output) + (1 - target) * numpy.log(1 - output))
+
+
+class Model:
+    """
+    @todo: input dimensions should be stored here! not as a global.
+    """
+    def __init__(self, input_dimension, hidden_dimension, learning_rate = 0.1, momentum = 0.9, weight_decay = 0.0002, random_seed = 666):
+        self.input_dimension    = input_dimension
+        self.hidden_dimension   = hidden_dimension
+        self.learning_rate      = learning_rate
+        self.momentum           = momentum
+        self.weight_decay       = weight_decay
+        self.random_seed        = random_seed
+
+        random.seed(random_seed)
+
+        self.parameters = parameters.Parameters(input_dimension=self.input_dimension, hidden_dimension=self.hidden_dimension, randomly_initialize=True, random_seed=self.random_seed)
+        self.prev_dw = 0
+        self.prev_db = 0
+        self.prev_dc = 0
+
+    def deterministic_reconstruction(self, v0):
+        """
+        One up-down cycle, but a mean-field approximation (no sampling).
+        """
+        q = sigmoid(self.parameters.b + dot(v0, self.parameters.w))
+        p = sigmoid(self.parameters.c + dot(q, self.parameters.w.T))
+        return p
+
+    def deterministic_reconstruction_error(self, v0):
+        """
+        @note: According to Yoshua, -log P(V1 = v0 | tilde(h)(v0)).
+        """
+        return crossentropy(self.deterministic_reconstruction(v0), v0)
+
+    def update(self, instances):
+        """
+        Update the L{Model} using one training instance.
+        @param instance: A dict from feature index to (non-zero) value.
+        @todo: Should assert that nonzero_indices and zero_indices
+        are correct (i.e. are truly nonzero/zero).
+        @todo: Multiply L{self.weight_decay} by L{self.learning_rate}, as done in Semantic Hashing?
+        @todo: Decay the biases too?
+        """
+        minibatch = len(instances)
+        v0 = pylearn.sparse_instance.to_vector(instances, self.input_dimension)
+        print "old XENT per instance:", numpy.sum(self.deterministic_reconstruction_error(v0))/minibatch
+        q0 = sigmoid(self.parameters.b + dot(v0, self.parameters.w))
+        h0 = sample(q0)
+        p0 = sigmoid(self.parameters.c + dot(h0, self.parameters.w.T))
+        v1 = sample(p0)
+        q1 = sigmoid(self.parameters.b + dot(v1, self.parameters.w))
+
+        dw = self.learning_rate * (dot(v0.T, h0) - dot(v1.T, q1)) / minibatch + self.momentum * self.prev_dw
+        db = self.learning_rate * numpy.sum(h0 - q1, axis=0) / minibatch + self.momentum * self.prev_db
+        dc = self.learning_rate * numpy.sum(v0 - v1, axis=0) / minibatch + self.momentum * self.prev_dc
+
+        self.parameters.w *= (1 - self.weight_decay)
+
+        self.parameters.w += dw
+        self.parameters.b += db
+        self.parameters.c += dc
+
+        self.last_dw = dw
+        self.last_db = db
+        self.last_dc = dc
+
+        print "new XENT per instance:", numpy.sum(self.deterministic_reconstruction_error(v0))/minibatch
+
+#        print
+#        print "v[0]:", v0
+#        print "Q(h[0][i] = 1 | v[0]):", q0
+#        print "h[0]:", h0
+#        print "P(v[1][j] = 1 | h[0]):", p0
+#        print "XENT(P(v[1][j] = 1 | h[0]) | v0):", numpy.sum(crossentropy(p0, v0))
+#        print "v[1]:", v1
+#        print "Q(h[1][i] = 1 | v[1]):", q1
+#
+#        print
+#        print v0.T.shape
+#        print h0.shape
+#        print dot(v0.T, h0).shape
+#        print self.parameters.w.shape
+#        self.parameters.w += self.learning_rate * (dot(v0.T, h0) - dot(v1.T, q1)) / minibatch
+#        print
+#        print h0.shape
+#        print q1.shape
+#        print self.parameters.b.shape
+#        self.parameters.b += self.learning_rate * numpy.sum(h0 - q1, axis=0) / minibatch
+#        print v0.shape, v1.shape
+#        print
+#        print self.parameters.c.shape
+#        self.parameters.c += self.learning_rate * numpy.sum(v0 - v1, axis=0) / minibatch
+#        print self.parameters
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/sandbox/rbm/parameters.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/sandbox/rbm/parameters.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,32 @@
+"""
+Parameters (weights) used by the L{Model}.
+"""
+
+import numpy
+
+class Parameters:
+    """
+    Parameters used by the L{Model}.
+    """
+    def __init__(self, input_dimension, hidden_dimension, randomly_initialize, random_seed):
+        """
+        Initialize L{Model} parameters.
+        @param randomly_initialize: If True, then randomly initialize
+        according to the given random_seed. If False, then just use zeroes.
+        """
+        if randomly_initialize:
+            numpy.random.random_seed(random_seed)
+            self.w = (numpy.random.rand(input_dimension, hidden_dimension)-0.5)/input_dimension
+            self.b = numpy.zeros((1, hidden_dimension))
+            self.c = numpy.zeros((1, input_dimension))
+        else:
+            self.w = numpy.zeros((input_dimension, hidden_dimension))
+            self.b = numpy.zeros((1, hidden_dimension))
+            self.c = numpy.zeros((1, input_dimension))
+
+    def __str__(self):
+        s = ""
+        s += "w: %s\n" % self.w
+        s += "b: %s\n" % self.b
+        s += "c: %s\n" % self.c
+        return s
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/sandbox/simple_autoassociator/README.txt
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/sandbox/simple_autoassociator/README.txt	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,5 @@
+This seems to work.
+
+@todo:
+    * Add momentum.
+    * Add learning rate decay schedule.
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/sandbox/simple_autoassociator/__init__.py
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/sandbox/simple_autoassociator/graph.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/sandbox/simple_autoassociator/graph.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,26 @@
+"""
+Theano graph for a simple autoassociator.
+@todo: Make nearly everything private.
+"""
+
+from pylearn.nnet_ops import sigmoid, binary_crossentropy
+from theano import tensor as t
+from theano.tensor import dot
+x           = t.dmatrix()
+w1          = t.dmatrix()
+b1          = t.dvector()
+w2          = t.dmatrix()
+b2          = t.dvector()
+h           = sigmoid(dot(x, w1) + b1)
+y           = sigmoid(dot(h, w2) + b2)
+
+loss_unsummed = binary_crossentropy(y, x)
+loss = t.sum(loss_unsummed)
+
+(gw1, gb1, gw2, gb2) = t.grad(loss, [w1, b1, w2, b2])
+
+import theano.compile
+
+inputs  = [x, w1, b1, w2, b2]
+outputs = [y, h, loss, gw1, gb1, gw2, gb2]
+trainfn = theano.compile.function(inputs, outputs)
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/sandbox/simple_autoassociator/main.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/sandbox/simple_autoassociator/main.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,31 @@
+#!/usr/bin/python
+"""
+    A simple autoassociator.
+
+    The learned model is::
+       h   = sigmoid(dot(x, w1) + b1)
+       y   = sigmoid(dot(h, w2) + b2)
+
+    Binary xent loss.
+"""
+
+
+import numpy
+
+nonzero_instances = []
+nonzero_instances.append({0: 1, 1: 1})
+nonzero_instances.append({0: 1, 2: 1})
+
+#nonzero_instances.append({1: 0.1, 5: 0.5, 9: 1})
+#nonzero_instances.append({2: 0.3, 5: 0.5, 8: 0.8})
+##nonzero_instances.append({1: 0.2, 2: 0.3, 5: 0.5})
+
+import model
+model = model.Model(input_dimension=10, hidden_dimension=4)
+
+for i in xrange(100000):
+#    # Select an instance
+#    instance = nonzero_instances[i % len(nonzero_instances)]
+
+    # Update over instance
+    model.update(nonzero_instances)
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/sandbox/simple_autoassociator/model.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/sandbox/simple_autoassociator/model.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,71 @@
+"""
+The model for an autoassociator for sparse inputs, using Ronan Collobert + Jason
+Weston's sampling trick (2008).
+"""
+
+from graph import trainfn
+import parameters
+
+import numpy
+import random
+
+import pylearn.sparse_instance
+
+class Model:
+    """
+    @todo: Add momentum.
+    @todo: Add learning rate decay schedule.
+    """
+    def __init__(self, input_dimension, hidden_dimension, learning_rate = 0.1, weight_decay = 0.0002, random_seed = 666):
+        self.input_dimension    = input_dimension
+        self.hidden_dimension   = hidden_dimension
+        self.learning_rate      = learning_rate
+        self.weight_decay       = weight_decay
+        self.random_seed        = random_seed
+
+        random.seed(random_seed)
+
+        self.parameters = parameters.Parameters(input_dimension=self.input_dimension, hidden_dimension=self.hidden_dimension, randomly_initialize=True, random_seed=self.random_seed)
+
+    def deterministic_reconstruction(self, x):
+        (y, h, loss, gw1, gb1, gw2, gb2) = trainfn(x, self.parameters.w1, self.parameters.b1, self.parameters.w2, self.parameters.b2)
+        return y
+
+    def update(self, instances):
+        """
+        Update the L{Model} using one training instance.
+        @param instances: A list of dict from feature index to (non-zero) value.
+        @todo: Should assert that nonzero_indices and zero_indices
+        are correct (i.e. are truly nonzero/zero).
+        @todo: Multiply L{self.weight_decay} by L{self.learning_rate}, as done in Semantic Hashing?
+        @todo: Decay the biases too?
+        """
+        minibatch = len(instances)
+        x = pylearn.sparse_instance.to_vector(instances, self.input_dimension)
+
+        (y, h, loss, gw1, gb1, gw2, gb2) = trainfn(x, self.parameters.w1, self.parameters.b1, self.parameters.w2, self.parameters.b2)
+#        print
+#        print "instance:", instance
+#        print "x:", x
+#        print "OLD y:", y
+        print "OLD total loss:", loss
+#        print "gw1:", gw1
+#        print "gb1:", gb1
+#        print "gw2:", gw2
+#        print "gb2:", gb2
+
+        self.parameters.w1 *= (1 - self.weight_decay)
+        self.parameters.w2 *= (1 - self.weight_decay)
+
+        # SGD update
+        self.parameters.w1  -= self.learning_rate * gw1 / minibatch
+        self.parameters.b1  -= self.learning_rate * gb1 / minibatch
+        self.parameters.w2  -= self.learning_rate * gw2 / minibatch
+        self.parameters.b2  -= self.learning_rate * gb2 / minibatch
+
+#        # Recompute the loss, to make sure it's descreasing
+#        (y, h, loss, gw1, gb1, gw2, gb2) = trainfn(x, self.parameters.w1, self.parameters.b1, self.parameters.w2, self.parameters.b2)
+##        print "NEW y:", y
+#        print "NEW total loss:", loss
+##        print "h:", h
+##        print self.parameters
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/sandbox/simple_autoassociator/parameters.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/sandbox/simple_autoassociator/parameters.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,36 @@
+"""
+Parameters (weights) used by the L{Model}.
+"""
+
+import numpy
+
+class Parameters:
+    """
+    Parameters used by the L{Model}.
+    """
+    def __init__(self, input_dimension, hidden_dimension, randomly_initialize, random_seed):
+        """
+        Initialize L{Model} parameters.
+        @param randomly_initialize: If True, then randomly initialize
+        according to the given seed. If False, then just use zeroes.
+        """
+        if randomly_initialize:
+            numpy.random.seed(random_seed)
+            self.w1 = (numpy.random.rand(input_dimension, hidden_dimension)-0.5)/input_dimension
+            self.w2 = (numpy.random.rand(hidden_dimension, input_dimension)-0.5)/hidden_dimension
+            self.b1 = numpy.zeros(hidden_dimension)
+            self.b2 = numpy.zeros(input_dimension)
+            #self.b2 = numpy.array([10, 0, 0, -10])
+        else:
+            self.w1 = numpy.zeros((input_dimension, hidden_dimension))
+            self.w2 = numpy.zeros((hidden_dimension, input_dimension))
+            self.b1 = numpy.zeros(hidden_dimension)
+            self.b2 = numpy.zeros(input_dimension)
+
+    def __str__(self):
+        s = ""
+        s += "w1: %s\n" % self.w1
+        s += "b1: %s\n" % self.b1
+        s += "w2: %s\n" % self.w2
+        s += "b2: %s\n" % self.b2
+        return s
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/sandbox/sparse_instance.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/sandbox/sparse_instance.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,22 @@
+"""
+Sparse instances.
+Each instance is represented as dict with key dimension.
+Dimensions not present in the dict have value 0.
+"""
+
+from numpy import zeros
+
+def to_vector(instances, dimensions):
+    """
+    Convert sparse instances to vectors.
+    @type instances: list of sparse instances
+    @param dimensions: The number of dimensions in each instance.
+    @rtype: numpy matrix (instances x dimensions)
+    @todo: Allow this function to convert SINGLE instances (not lists).
+    """
+    v = zeros((len(instances), dimensions))
+    l = len(instances)
+    for i in range(l):
+        for idx in instances[i].keys():
+            v[i][idx] = instances[i][idx]
+    return v
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/sandbox/sparse_random_autoassociator/README.txt
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/sandbox/sparse_random_autoassociator/README.txt	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,1 @@
+Since simple_aa doesn't work, this probably doesn't either.
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/sandbox/sparse_random_autoassociator/__init__.py
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/sandbox/sparse_random_autoassociator/globals.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/sandbox/sparse_random_autoassociator/globals.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,13 @@
+"""
+Global variables.
+"""
+
+INPUT_DIMENSION = 1000
+HIDDEN_DIMENSION = 20
+LEARNING_RATE = 0.1
+LR = LEARNING_RATE
+SEED = 666
+ZERO_SAMPLE_SIZE = 50
+#ZERO_SAMPLE_SIZE = 250
+MARGIN = 0.25
+#MARGIN = 0.0
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/sandbox/sparse_random_autoassociator/graph.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/sandbox/sparse_random_autoassociator/graph.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,42 @@
+"""
+Theano graph for an autoassociator for sparse inputs, which will be trained
+using Ronan Collobert + Jason Weston's sampling trick (2008).
+@todo: Make nearly everything private.
+"""
+
+from globals import MARGIN
+
+from pylearn.nnet_ops import sigmoid, binary_crossentropy
+from theano import tensor as t
+from theano.tensor import dot
+xnonzero    = t.dvector()
+w1nonzero   = t.dmatrix()
+b1          = t.dvector()
+w2nonzero   = t.dmatrix()
+w2zero      = t.dmatrix()
+b2nonzero   = t.dvector()
+b2zero      = t.dvector()
+h           = sigmoid(dot(xnonzero, w1nonzero) + b1)
+ynonzero    = sigmoid(dot(h, w2nonzero) + b2nonzero)
+yzero       = sigmoid(dot(h, w2zero) + b2zero)
+
+# May want to weight loss wrt nonzero value? e.g. MARGIN violation for
+# 0.1 nonzero is not as bad as MARGIN violation for 0.2 nonzero.
+def hingeloss(MARGIN):
+    return -MARGIN * (MARGIN < 0)
+nonzeroloss = hingeloss(ynonzero - t.max(yzero) - MARGIN)
+zeroloss = hingeloss(-t.max(-(ynonzero)) - yzero - MARGIN)
+# xnonzero sensitive loss:
+#nonzeroloss = hingeloss(ynonzero - t.max(yzero) - MARGIN - xnonzero)
+#zeroloss = hingeloss(-t.max(-(ynonzero - xnonzero)) - yzero - MARGIN)
+loss = t.sum(nonzeroloss) + t.sum(zeroloss)
+
+#loss = t.sum(binary_crossentropy(ynonzero, xnonzero)) + t.sum(binary_crossentropy(yzero, t.constant(0)))
+
+(gw1nonzero, gb1, gw2nonzero, gw2zero, gb2nonzero, gb2zero) = t.grad(loss, [w1nonzero, b1, w2nonzero, w2zero, b2nonzero, b2zero])
+
+import theano.compile
+
+inputs  = [xnonzero, w1nonzero, b1, w2nonzero, w2zero, b2nonzero, b2zero]
+outputs = [ynonzero, yzero, loss, gw1nonzero, gb1, gw2nonzero, gw2zero, gb2nonzero, gb2zero]
+trainfn = theano.compile.function(inputs, outputs)
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/sandbox/sparse_random_autoassociator/main.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/sandbox/sparse_random_autoassociator/main.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,48 @@
+#!/usr/bin/python
+"""
+    An autoassociator for sparse inputs, using Ronan Collobert + Jason
+    Weston's sampling trick (2008).
+
+    The learned model is::
+       h   = sigmoid(dot(x, w1) + b1)
+       y   = sigmoid(dot(h, w2) + b2)
+
+    We assume that most of the inputs are zero, and hence that
+    we can separate x into xnonzero, x's nonzero components, and
+    xzero, a sample of the zeros. We sample---randomly without
+    replacement---ZERO_SAMPLE_SIZE zero columns from x.
+
+    The desideratum is that every nonzero entry is separated from every
+    zero entry by margin at least MARGIN.
+    For each ynonzero, we want it to exceed max(yzero) by at least MARGIN.
+    For each yzero, we want it to be exceed by min(ynonzero) by at least MARGIN.
+    The loss is a hinge loss (linear). The loss is irrespective of the
+    xnonzero magnitude (this may be a limitation). Hence, all nonzeroes
+    are equally important to exceed the maximum yzero.
+
+    (Alternately, there is a commented out binary xent loss.)
+
+    LIMITATIONS:
+       - Only does pure stochastic gradient (batchsize = 1).
+       - Loss is irrespective of the xnonzero magnitude.
+       - We will always use all nonzero entries, even if the training
+       instance is very non-sparse.
+"""
+
+
+import numpy
+
+nonzero_instances = []
+nonzero_instances.append({1: 0.1, 5: 0.5, 9: 1})
+nonzero_instances.append({2: 0.3, 5: 0.5, 8: 0.8})
+nonzero_instances.append({1: 0.2, 2: 0.3, 5: 0.5})
+
+import model
+model = model.Model()
+
+for i in xrange(100000):
+    # Select an instance
+    instance = nonzero_instances[i % len(nonzero_instances)]
+
+    # SGD update over instance
+    model.update(instance)
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/sandbox/sparse_random_autoassociator/model.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/sandbox/sparse_random_autoassociator/model.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,76 @@
+"""
+The model for an autoassociator for sparse inputs, using Ronan Collobert + Jason
+Weston's sampling trick (2008).
+"""
+
+from graph import trainfn
+import parameters
+
+import globals
+from globals import LR
+
+import numpy
+import random
+random.seed(globals.SEED)
+
+def _select_indices(instance):
+    """
+    Choose nonzero and zero indices (feature columns) of the instance.
+    We select B{all} nonzero indices.
+    We select L{globals.ZERO_SAMPLE_SIZE} zero indices randomly,
+    without replacement.
+    @bug: If there are not ZERO_SAMPLE_SIZE zeroes, we will enter
+    an endless loop.
+    @return: (nonzero_indices, zero_indices)
+    """
+    # Get the nonzero indices
+    nonzero_indices = instance.keys()
+    nonzero_indices.sort()
+
+    # Get the zero indices
+    # @bug: If there are not ZERO_SAMPLE_SIZE zeroes, we will enter an endless loop.
+    zero_indices = []
+    while len(zero_indices) < globals.ZERO_SAMPLE_SIZE:
+        idx = random.randint(0, globals.INPUT_DIMENSION - 1)
+        if idx in nonzero_indices or idx in zero_indices: continue
+        zero_indices.append(idx)
+    zero_indices.sort()
+
+    return (nonzero_indices, zero_indices)
+
+class Model:
+    def __init__(self):
+        self.parameters = parameters.Parameters(randomly_initialize=True)
+
+    def update(self, instance):
+        """
+        Update the L{Model} using one training instance.
+        @param instance: A dict from feature index to (non-zero) value.
+        @todo: Should assert that nonzero_indices and zero_indices
+        are correct (i.e. are truly nonzero/zero).
+        """
+        (nonzero_indices, zero_indices) = _select_indices(instance)
+        # No update if there aren't any non-zeros.
+        if len(nonzero_indices) == 0: return
+        xnonzero = numpy.asarray([instance[idx] for idx in nonzero_indices])
+        print
+        print "xnonzero:", xnonzero
+
+        (ynonzero, yzero, loss, gw1nonzero, gb1, gw2nonzero, gw2zero, gb2nonzero, gb2zero) = trainfn(xnonzero, self.parameters.w1[nonzero_indices, :], self.parameters.b1, self.parameters.w2[:, nonzero_indices], self.parameters.w2[:, zero_indices], self.parameters.b2[nonzero_indices], self.parameters.b2[zero_indices])
+        print "OLD ynonzero:", ynonzero
+        print "OLD yzero:", yzero
+        print "OLD total loss:", loss
+
+        # SGD update
+        self.parameters.w1[nonzero_indices, :]  -= LR * gw1nonzero
+        self.parameters.b1						-= LR * gb1
+        self.parameters.w2[:, nonzero_indices]  -= LR * gw2nonzero
+        self.parameters.w2[:, zero_indices]		-= LR * gw2zero
+        self.parameters.b2[nonzero_indices]		-= LR * gb2nonzero
+        self.parameters.b2[zero_indices]		-= LR * gb2zero
+
+        # Recompute the loss, to make sure it's descreasing
+        (ynonzero, yzero, loss, gw1nonzero, gb1, gw2nonzero, gw2zero, gb2nonzero, gb2zero) = trainfn(xnonzero, self.parameters.w1[nonzero_indices, :], self.parameters.b1, self.parameters.w2[:, nonzero_indices], self.parameters.w2[:, zero_indices], self.parameters.b2[nonzero_indices], self.parameters.b2[zero_indices])
+        print "NEW ynonzero:", ynonzero
+        print "NEW yzero:", yzero
+        print "NEW total loss:", loss
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/sandbox/sparse_random_autoassociator/parameters.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/sandbox/sparse_random_autoassociator/parameters.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,28 @@
+"""
+Parameters (weights) used by the L{Model}.
+"""
+
+import numpy
+import globals
+
+class Parameters:
+    """
+    Parameters used by the L{Model}.
+    """
+    def __init__(self, input_dimension=globals.INPUT_DIMENSION, hidden_dimension=globals.HIDDEN_DIMENSION, randomly_initialize=False, seed=globals.SEED):
+        """
+        Initialize L{Model} parameters.
+        @param randomly_initialize: If True, then randomly initialize
+        according to the given seed. If False, then just use zeroes.
+        """
+        if randomly_initialize:
+            numpy.random.seed(seed)
+            self.w1 = (numpy.random.rand(input_dimension, hidden_dimension)-0.5)/input_dimension
+            self.w2 = (numpy.random.rand(hidden_dimension, input_dimension)-0.5)/hidden_dimension
+            self.b1 = numpy.zeros(hidden_dimension)
+            self.b2 = numpy.zeros(input_dimension)
+        else:
+            self.w1 = numpy.zeros((input_dimension, hidden_dimension))
+            self.w2 = numpy.zeros((hidden_dimension, input_dimension))
+            self.b1 = numpy.zeros(hidden_dimension)
+            self.b2 = numpy.zeros(input_dimension)
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/sandbox/statscollector.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/sandbox/statscollector.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,127 @@
+
+# Here is how I see stats collectors:
+
+def my_stats(graph):
+    graph.mse=examplewise_mean(square_norm(graph.residue))
+    graph.training_loss=graph.regularizer+examplewise_sum(graph.nll)
+    return [graph.mse,graph.training_loss]
+    
+
+#    def my_stats(residue,nll,regularizer):
+#            mse=examplewise_mean(square_norm(residue))
+#            training_loss=regularizer+examplewise_sum(nll)
+#            set_names(locals())
+#            return ((residue,nll),(regularizer),(),(mse,training_loss))
+#    my_stats_collector = make_stats_collector(my_stats)
+#
+# where make_stats_collector calls my_stats(examplewise_fields, attributes) to
+# construct its update function, and figure out what are the input fields (here "residue"
+# and "nll") and input attributes (here "regularizer") it needs, and the output
+# attributes that it computes (here "mse" and "training_loss"). Remember that
+# fields are examplewise quantities, but attributes are not, in my jargon.
+# In the above example, I am highlighting that some operations done in my_stats
+# are examplewise and some are not.  I am hoping that theano Ops can do these
+# kinds of internal side-effect operations (and proper initialization of these hidden
+# variables). I expect that a StatsCollector (returned by make_stats_collector)
+# knows the following methods:
+#     stats_collector.input_fieldnames
+#     stats_collector.input_attribute_names
+#     stats_collector.output_attribute_names
+#     stats_collector.update(mini_dataset)
+#     stats_collector['mse']
+# where mini_dataset has the input_fieldnames() as fields and the input_attribute_names()
+# as attributes, and in the resulting dataset the output_attribute_names() are set to the
+# proper numeric values.
+
+
+
+import theano
+from theano import tensor as t
+from Learner import Learner
+from lookup_list import LookupList
+
+class StatsCollectorModel(AttributesHolder):
+    def __init__(self,stats_collector):
+        self.stats_collector = stats_collector
+        self.outputs = LookupList(stats_collector.output_names,[None for name in stats_collector.output_names])
+        # the statistics get initialized here
+        self.update_function = theano.function(input_attributes+input_fields,output_attributes+output_fields,linker="c|py")
+        for name,value in self.outputs.items():
+            self.__setattribute__(name,value)
+    def update(self,dataset):
+        input_fields = dataset.fields()(self.stats_collector.input_field_names)
+        input_attributes = dataset.getAttributes(self.stats_collector.input_attribute_names)
+        self.outputs._values = self.update_function(input_attributes+input_fields)
+        for name,value in self.outputs.items():
+            self.__setattribute__(name,value)
+    def __call__(self):
+        return self.outputs
+    def attributeNames(self):
+        return self.outputs.keys()
+    
+class StatsCollector(AttributesHolder):
+        
+    def __init__(self,input_attributes, input_fields, outputs):
+        self.input_attributes = input_attributes
+        self.input_fields = input_fields
+        self.outputs = outputs
+        self.input_attribute_names = [v.name for v in input_attributes]
+        self.input_field_names = [v.name for v in input_fields]
+        self.output_names = [v.name for v in output_attributes]
+            
+    def __call__(self,dataset=None):
+        model = StatsCollectorModel(self)
+        if dataset:
+            self.update(dataset)
+        return model
+
+if __name__ == '__main__':
+    def my_statscollector():
+        regularizer = t.scalar()
+        nll = t.matrix()
+        class_error = t.matrix()
+        total_loss = regularizer+t.examplewise_sum(nll)
+        avg_nll = t.examplewise_mean(nll)
+        avg_class_error = t.examplewise_mean(class_error)
+        for name,val in locals().items(): val.name = name
+        return StatsCollector([regularizer],[nll,class_error],[total_loss,avg_nll,avg_class_error])
+    
+
+
+
+# OLD DESIGN:
+#
+# class StatsCollector(object):
+#     """A StatsCollector object is used to record performance statistics during training
+#     or testing of a learner. It can be configured to measure different things and
+#     accumulate the appropriate statistics. From these statistics it can be interrogated
+#     to obtain performance measures of interest (such as maxima, minima, mean, standard
+#     deviation, standard error, etc.). Optionally, the observations can be weighted
+#     (yielded weighted mean, weighted variance, etc., where applicable). The statistics
+#     that are desired can be specified among a list supported by the StatsCollector
+#     class or subclass. When some statistics are requested, others become automatically
+#     available (e.g., sum or mean)."""
+#
+#     default_statistics = [mean,standard_deviation,min,max]
+#    
+#     __init__(self,n_quantities_observed, statistics=default_statistics):
+#         self.n_quantities_observed=n_quantities_observed
+#
+#     clear(self):
+#         raise NotImplementedError
+#
+#     update(self,observations):
+#         """The observations is a numpy vector of length n_quantities_observed. Some
+#         entries can be 'missing' (with a NaN entry) and will not be counted in the
+#         statistics."""
+#         raise NotImplementedError
+#
+#     __getattr__(self, statistic)
+#         """Return a particular statistic, which may be inferred from the collected statistics.
+#         The argument is a string naming that statistic."""
+        
+
+    
+
+    
+    
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/sandbox/test_speed.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/sandbox/test_speed.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,79 @@
+import numpy
+from pylearn.datasets import *
+from misc import *
+def test_speed(array, ds):
+    print "test_speed", ds.__class__
+
+    mat = numpy.random.rand(400,100)
+
+    @print_timing
+    def f_array_full(a):
+        a+1
+    @print_timing
+    def f_array_index(a):
+        for id in range(a.shape[0]):
+#            pass
+            a[id]+1
+#            a[id]*mat
+    @print_timing
+    def f_array_iter(a):
+        for r in a:
+#            pass
+            r+1
+#            r*mat
+    @print_timing
+    def f_ds_index(ds):
+        for id in range(len(ds)):
+#            pass
+            ds[id][0]+1
+#            ds[id][0]*mat
+    @print_timing
+    def f_ds_iter(ds):
+        for ex in ds:
+#            pass
+            ex[0]+1
+#            a[0]*mat
+    @print_timing
+    def f_ds_mb1(ds,mb_size):
+        for exs in ds.minibatches(minibatch_size = mb_size):
+            for ex in exs:
+#                pass
+                ex[0]+1
+#                ex[0]*mat
+    @print_timing
+    def f_ds_mb2(ds,mb_size):
+        for exs in ds.minibatches(minibatch_size = mb_size):
+#            pass
+            exs[0]+1
+#            ex[0]*mat
+
+    f_array_full(array)
+    f_array_index(array)
+    f_array_iter(array)
+
+    f_ds_index(ds)
+    f_ds_iter(ds)
+
+    f_ds_mb1(ds,10)
+    f_ds_mb1(ds,100)
+    f_ds_mb1(ds,1000)
+    f_ds_mb1(ds,10000)
+    f_ds_mb2(ds,10)
+    f_ds_mb2(ds,100)
+    f_ds_mb2(ds,1000)
+    f_ds_mb2(ds,10000)
+
+if __name__=='__main__':
+    a2 = numpy.random.rand(100000,400)
+    ds1 = ArrayDataSet(a2,{'all':slice(0,a2.shape[1],1)})
+    test_speed(a2,ds1)
+    a1 = numpy.random.rand(100000,40)
+    ds4 = ArrayDataSet(a1,LookupList(["f"+str(x)for x in range(a1.shape[1])],
+                                     range(a1.shape[1])))
+    test_speed(a2,ds4)
+    ds2=CachedDataSet(ds1,cache_all_upon_construction=False)
+    test_speed(a2,ds2)
+    ds3=CachedDataSet(ds1,cache_all_upon_construction=True)
+    test_speed(a2,ds3)
+    del a2,ds1,ds2,ds3
+
diff -r 27b1344a57b1 -r 8fff4bc26f4c pylearn/version.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/version.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,292 @@
+import subprocess as _subprocess
+import imp as _imp
+import sys
+import os
+
+
+_cache = dict()
+
+def src_version(module_name):
+    """Return compact identifier of module code.
+
+    @return: compact identifier of module code.
+    @rtype: string
+
+    @note: This function tries to establish that the source files and the repo
+    are syncronized.  It raises an Exception if there are un-tracked '.py'
+    files, or if there are un-committed modifications.  This implementation uses
+    "hg id" to establish this.  The code returned by "hg id" is not affected by
+    hg pull, but pulling might remove the " tip" string which might have
+    appeared.  This implementation ignores the  " tip" information, and only
+    uses the code.
+
+    @note: This implementation is assumes that the import directory is under
+    version control by mercurial.
+
+    """
+
+    if module_name not in _cache:
+
+        try :
+            location = _imp.find_module(module_name)[1]
+        except ImportError:
+            _cache[module_name] = None
+            return None
+        #print 'location:', location
+        isdir = False
+        if os.path.isdir(location) :
+            isdir = True
+        elif os.path.isfile(location) :
+            isdir = False
+        else :
+            # SEEMS THIS CASE EXIST, FOR WEIRD BUILTIN FUNCTIONS
+            #print location,": it's 'not a dir, it's not a file, it's superman!"
+            #raise Exception('Unknown location or file type')
+            _cache[module_name] = None
+            return None
+
+
+        # we're dealing with a dir
+        if isdir :
+
+            # under hg?
+            if not os.path.exists( os.path.join( location , '.hg') ) :
+                _cache[module_name] = None
+                return None
+
+            status = _subprocess.Popen(('hg','st'),cwd=location,stdout=_subprocess.PIPE).communicate()[0]
+            #print 'status =', status
+            #TODO: check that the process return code is 0 (ticket #45)
+
+            #status_codes = [line[0] for line in  if line and line[0] != '?']
+            for line in status.split('\n'):
+                if not line: continue
+                if line[0] != '?':
+                    raise Exception('Uncommitted modification to "%s" in %s (%s)'
+                        %(line[2:], __name__,location))
+                if line[0] == '?' and line[-3:] == '.py':
+                    raise Exception('Untracked file "%s" in %s (%s)'
+                        %(line[2:], __name__, location))
+
+            hg_id = _subprocess.Popen(('hg','id'),cwd=location,stdout=_subprocess.PIPE).communicate()[0]
+
+            # This asserts my understanding of hg id return values
+            # There is mention in the doc that it might return two parent hash codes
+            # but I've never seen it, and I dont' know what it means or how it is
+            # formatted.
+            tokens = hg_id.split(' ')
+            assert len(tokens) <= 2
+            assert len(tokens) >= 1
+            assert tokens[0][-1] != '+' # the trailing + indicates uncommitted changes
+            if len(tokens) == 2:
+                assert tokens[1] == 'tip\n'
+
+            _cache[module_name] = tokens[0]
+
+        # we're dealing with a file
+        if not isdir :
+
+            folder = os.path.split( os.path.abspath(location) )[0]
+            # under hg?
+            if not os.path.exists( os.path.join( folder , '.hg') ) :
+                _cache[module_name] = None
+                return None
+
+            status = _subprocess.Popen(('hg','st',location),cwd=folder,stdout=_subprocess.PIPE).communicate()[0]
+            #print 'status =', status
+
+            #status_codes = [line[0] for line in  if line and line[0] != '?']
+            for line in status.split('\n'):
+                if not line: continue
+                if line[0] != '?':
+                    raise Exception('Uncommitted modification to "%s" in %s (%s)'
+                        %(line[2:], location,folder))
+                if line[0] == '?' and line[-3:] == '.py':
+                    raise Exception('Untracked file "%s" in %s (%s)'
+                        %(line[2:], location, folder))
+
+            hg_id = _subprocess.Popen(('hg','id'),cwd=folder,stdout=_subprocess.PIPE).communicate()[0]
+
+            # This asserts my understanding of hg id return values
+            # There is mention in the doc that it might return two parent hash codes
+            # but I've never seen it, and I dont' know what it means or how it is
+            # formatted.
+            tokens = hg_id.split(' ')
+            assert len(tokens) <= 2
+            assert len(tokens) >= 1
+            if tokens[0][-1] == '+' :
+                tokens[0] = tokens[0][:-1] # the change was not on this file
+            if len(tokens) == 2:
+                assert tokens[1] == 'tip\n'
+
+            _cache[module_name] = tokens[0]
+
+
+    return _cache[module_name]
+
+_unknown_version = 'unknown version'
+
+def hg_version(dirname, filenames=None):
+    """Return current changeset of directory I{dirname}.
+
+    @type filename: list of str (or default: None)
+    @param filename: if specified, we ignore modifications to other files.
+
+    @rtype: tuple (last changeset, modified)
+
+    """
+    if type(filenames) not in (list, tuple, type(None)):
+        raise TypeError(filenames) 
+
+    #may raise exception, for example if hg is not visible via PATH
+    status_proc = _subprocess.Popen(('hg','st'), cwd=dirname, 
+            stdout=_subprocess.PIPE, stderr=_subprocess.PIPE)
+    status = status_proc.communicate()[0] #read stdout into buffer
+    if status_proc.returncode != 0:
+        raise OSError('hg returned %i, maybe %s is not under hg control?',
+                (status_proc.returncode, dirname))
+
+    #may raise exception, for example if hg is not visible via PATH
+    id_proc = _subprocess.Popen(('hg','id', '-i'), cwd=dirname,
+            stdout=_subprocess.PIPE, stderr=_subprocess.PIPE)
+    id_stdout = id_proc.communicate()[0]
+    if id_proc.returncode != 0:
+        raise OSError('hg returned %i, maybe %s is not under hg control?', 
+                (id_proc.returncode, dirname))
+
+    care_about = (lambda some_file : True) if filenames is None \
+            else (lambda some_file : some_file in filenames)
+
+    # parse status codes for what we care about
+    care_about_mod = False
+    for line in status.split('\n'):
+        if not line:  #empty lines happen
+            continue
+        line_file = line[2:]
+        if line[0] != '?' and care_about(line_file): 
+            care_about_mod = True
+            #raise Exception('Uncommitted modification', 
+                    #os.path.join(dirname, line_file))
+        if line[0] == '?' and line[-3:] == '.py':
+            print >> sys.stderr, 'WARNING: untracked file', os.path.join(dirname, line_file)
+
+    # id_stdout is 12 hex digits followed by '+\n' or '\n'
+    # return the trailing '+' character only if there were changes to files that
+    # the caller cares about (named in filenames)
+    modified = (id_stdout[12] == '+')
+    assert len(id_stdout) in (13, 14) #sanity check
+    if modified and care_about_mod :
+        return id_stdout[:13]
+    else:
+        return id_stdout[:12]
+
+def _import_id_py_source(location):
+    try:
+        dirname = os.path.dirname(location[1])
+        basename = os.path.basename(location[1])
+        return hg_version(dirname, [basename])
+    except OSError, e:
+        print >> sys.stderr, 'IGNORNING', e
+        return _unknown_version + ' PY_SOURCE'
+
+def _import_id_py_compiled(location):
+    #a .pyc file was found, but no corresponding .py
+    return _unknown_version + ' PYC_COMPILED'
+
+def _import_id_pkg_directory(location):
+    try:
+        return hg_version(location[1])
+    except OSError, e:
+        print >> sys.stderr, 'IGNORNING', e
+        return _unknown_version + ' PKG_DIRECTORY'
+
+def _import_id(tag):
+    try :
+        location = _imp.find_module(tag)
+    except ImportError, e: #raise when tag is not found
+        return e #put this in the cache, import_id will raise it
+
+    #the find_module was successful, location is valid
+    resource_type = location[2][2]
+
+    if resource_type == _imp.PY_SOURCE:
+        return _import_id_py_source(location)
+    if resource_type == _imp.PY_COMPILED:
+        return _import_id_py_compiled(location)
+    if resource_type == _imp.C_EXTENSION:
+        raise NoteImplementedError
+    if resource_type == _imp.PY_RESOURCE:
+        raise NoteImplementedError
+    if resource_type == _imp.PKG_DIRECTORY:
+        return _import_id_pkg_directory(location)
+    if resource_type == _imp.C_BUILTIN:
+        raise NoteImplementedError
+    if resource_type == _imp.PY_FROZEN:
+        raise NoteImplementedError
+
+    assert False #the list of resource types above should be exhaustive
+
+def import_id(tag):
+    """Return an identifier of the code imported by 'import <tag>'.
+
+    @param tag: a module or file name
+    @type tag: string
+
+    @rtype: string
+    @return: identifier of the code imported by 'import <tag>'.
+
+    This high-level function might do different things depending on, for
+    example, whether I{tag} identifies a file or a directory, or whether the
+    named entity is under some sort of version/revision control.
+
+    Versions are sought in the following order:
+    0. If I{tag} is 'python' then sys.version will be returned
+    1. If I{tag} names a file or folder under revision control, this function
+    will attempt to guess which one, and return a string that identifies the
+    running code (a revision id, not the whole file!)
+    2.  If I{tag} names a module with a __version__ attribute, then that
+    attribute will be returned as a string.
+    3. The string starting with 'unknown version' will be returned for other valid modules.
+    4. An exception will be raise for non-existent modules.
+
+    @note: This function may import the named entity in order to return a
+    __version__ module attribute.
+
+    """
+    if tag not in import_id.cache:
+        import_id.cache[tag] = _import_id(tag)
+
+    #in the case of bad module names, we cached the ImportError exception
+    rval = import_id.cache[tag]
+    if isinstance(rval, Exception):
+        raise rval
+    return rval
+import_id.cache = {'python':sys.version}
+
+def get_all_src_versions() :
+    """
+    Get the version of all loaded module.
+    Calls src_version on all loaded modules. These modules are found
+    using sys.modules.
+
+    Returns a dictionnary: name->version.
+    
+    @RETURN dict Dictionnary (module's name) -> (version)
+    @SEE src_version
+    """
+    allmodules = sys.modules
+    d = dict()
+    for m in allmodules :
+        try:
+            d[m] = import_id(m)
+        except:
+            pass
+    return d
+
+
+if __name__ == "__main__" :
+
+    if len(sys.argv) == 2 :
+        print 'testing on', sys.argv[1]
+        print import_id(sys.argv[1])
+        
diff -r 27b1344a57b1 -r 8fff4bc26f4c random_transformation.py
--- a/random_transformation.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,132 +0,0 @@
-"""
-New L{Op}s that aren't in core theano
-"""
-
-from theano import sparse
-from theano import tensor
-from theano import scalar
-from theano.gof import op
-
-from theano.sparse import _is_dense, _is_sparse, _is_dense_result, _is_sparse_result
-
-import scipy.sparse
-
-import numpy
-
-class RowRandomTransformation(op.Op):
-    """
-    Given C{x}, a (sparse) matrix with shape (exmpls, dimensions), we
-    multiply it by a deterministic random matrix of shape (dimensions,
-    length) to obtain random transformation output of shape (exmpls,
-    length).
-
-    Each element of the deterministic random matrix is selected uniformly
-    from [-1, +1).
-    @todo: Use another random distribution?
-
-    @note: This function should be written such that if length is
-    increased, we obtain the same results (except longer). Similarly,
-    the rows should be able to be permuted and get the same result in
-    the same fashion.
-
-    @todo: This may be slow?
-    @todo: Rewrite for dense matrices too?
-    @todo: Is there any way to verify the convention that each row is
-    an example? Should I rename the variables in the code to make the
-    semantics more explicit?
-    @todo: AUTOTEST: Autotest that dense and spare versions of this are identical.
-    @todo: Rename? Is Row the correct name? Maybe column-wise?
-
-    @type  x: L{scipy.sparse.spmatrix}
-    @param x: Sparse matrix to be randomly transformed with shape (exmpls, dimensions)
-    @type  length: int
-    @param length: The number of transformations of C{x} to be performed.
-    @param initial_seed: Initial seed for the RNG.
-    @rtype: L{numpy.ndarray}
-    @return: Array with C{length} random transformations, with shape (exmpls, length)
-    """
-
-    import random
-    """
-    RNG used for random transformations.
-    Does not share state with rest of program.
-    @todo: Make STATIC and private. Ask James or Olivier how to make this more Pythonic.
-    """
-    _trng = random.Random()
-
-    def __init__(self, x, length, initial_seed=0, **kwargs):
-        """
-        @todo: Which broadcastable values should I use?
-        """
-        assert 0        # Needs to be updated to Olivier's new Op creation approach
-        op.Op.__init__(self, **kwargs)
-        x = sparse.as_sparse(x)
-        self.initial_seed = initial_seed
-        self.length = length
-        self.inputs = [x]
-        self.outputs = [tensor.Tensor(x.dtype, broadcastable=[False, False])]
-#        self.outputs = [tensor.Tensor(x.dtype, broadcastable=[True, True])]
-
-    def _random_matrix_value(self, row, col, rows):
-        """
-        From a deterministic random matrix, find one element.
-        @param row: The row of the element to be read.
-        @param col: The column of the element to be read.
-        @param row: The number of rows in the matrix.
-        @type row: int
-        @type col: int
-        @type rows: int
-        @note: This function is designed such that if we extend
-        the number of columns in the random matrix, the values of
-        the earlier entries is unchanged.
-        @todo: Make this static
-        """
-        # Choose the random entry at (l, c)
-        rngidx = col * rows + row
-        # Set the random number state for this random entry
-        # Note: This may be slow
-        self._trng.seed(rngidx + self.initial_seed)
-
-        # Determine the value for this entry
-        val = self._trng.uniform(-1, +1)
-#       print "Exmpl #%d, dimension #%d => Random projection #%d has idx %d (+ seed %d) and value %f" % (r, c, j, rngidx, self.initial_seed, val)
-        return val
-
-    def impl(self, xorig):
-        assert _is_sparse(xorig)
-        assert len(xorig.shape) == 2
-        # Since conversions to and from the COO format are quite fast, you
-        # can use this approach to efficiently implement lots computations
-        # on sparse matrices.
-        x = xorig.tocoo()
-        (rows, cols) = x.shape
-        tot = rows * cols
-        out = numpy.zeros((rows, self.length))
-#        print "l = %d" % self.length
-#        print "x.getnnz() = %d" % x.getnnz()
-        all = zip(x.col, x.row, x.data)
-        all.sort()      # TODO: Maybe this is very slow?
-        lastc = None
-        lastl = None
-        lastval = None
-        for l in range(self.length):
-            for (c, r, data) in all:
-                assert c < cols
-                assert r < rows
-                if not c == lastc or not l == lastl:
-                    lastc = c
-                    lastl = l
-                    lastval = self._random_matrix_value(c, l, cols)
-                val = lastval
-#                val = self._random_matrix_value(c, l, cols)
-#                val = self._trng.uniform(-1, +1)
-#                val = 1.0
-                out[r][l] += val * data
-        return out
-    def __copy__(self):
-        return self.__class__(self.inputs[0], self.length, self.initial_seed)
-    def clone_with_new_inputs(self, *new_inputs):
-        return self.__class__(new_inputs[0], self.length, self.initial_seed)
-    def desc(self, *new_inputs):
-        return (self.__class__, self.length, self.initial_seed)
-row_random_transformation = RowRandomTransformation()
diff -r 27b1344a57b1 -r 8fff4bc26f4c sandbox/README.txt
--- a/sandbox/README.txt	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1 +0,0 @@
-Stuff in the sandbox may be very broken and/or in flux.
diff -r 27b1344a57b1 -r 8fff4bc26f4c sandbox/__init__.py
diff -r 27b1344a57b1 -r 8fff4bc26f4c sandbox/denoising_aa.py
--- a/sandbox/denoising_aa.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,224 +0,0 @@
-"""
-A denoising auto-encoder
-
-@warning: You should use this interface. It is not complete and is not functional.
-Instead, use::
-    ssh://projects@lgcm.iro.umontreal.ca/repos/denoising_aa
-"""
-
-import theano
-from theano.formula import *
-from learner import *
-from theano import tensor as t
-from nnet_ops import *
-import math
-from misc import *
-from misc_theano import *
-from theano.tensor_random import binomial
-
-def hiding_corruption_formula(seed,average_fraction_hidden):
-    """
-    Return a formula for the corruption process, in which a random
-    subset of the input numbers are hidden (mapped to 0). 
-
-    @param seed: seed of the random generator
-    @type seed: anything that numpy.random.RandomState accepts
-    
-    @param average_fraction_hidden: the probability with which each
-                                    input number is hidden (set to 0).
-    @type average_fraction_hidden: 0 <= real number <= 1
-    """
-    class HidingCorruptionFormula(Formulas):
-        x = t.matrix()
-        corrupted_x = x * binomial(seed,x,1,fraction_sampled)
-
-    return HidingCorruptionFormula()
-
-def squash_affine_formula(squash_function=sigmoid):
-    """
-    Simply does: squash_function(b + xW)
-    By convention prefix the parameters by _
-    """
-    class SquashAffineFormula(Formulas):
-        x = t.matrix() # of dimensions minibatch_size x n_inputs
-        _b = t.row() # of dimensions 1 x n_outputs
-        _W = t.matrix() # of dimensions n_inputs x n_outputs
-        a = _b + t.dot(x,_W) # of dimensions minibatch_size x n_outputs
-        y = squash_function(a)
-    return SquashAffineFormula()
-
-def gradient_descent_update_formula():
-    class GradientDescentUpdateFormula(Formula):
-        param = t.matrix()
-        learning_rate = t.scalar()
-        cost = t.column() # cost of each example in a minibatch
-        param_update = t.add_inplace(param, -learning_rate*t.sgrad(cost))
-    return gradient_descent_update_formula()
-    
-def probabilistic_classifier_loss_formula():
-    class ProbabilisticClassifierLossFormula(Formulas):
-        a = t.matrix() # of dimensions minibatch_size x n_classes, pre-softmax output
-        target_class = t.ivector() # dimension (minibatch_size)
-        nll, probability_predictions = crossentropy_softmax_1hot(a, target_class) # defined in nnet_ops.py
-    return ProbabilisticClassifierLossFormula()
-
-def binomial_cross_entropy_formula():
-    class BinomialCrossEntropyFormula(Formulas):
-        a = t.matrix() # pre-sigmoid activations, minibatch_size x dim
-        p = sigmoid(a) # model prediction
-        q = t.matrix() # target binomial probabilities, minibatch_size x dim
-        # using the identity softplus(a) - softplus(-a) = a,
-        # we obtain that q log(p) + (1-q) log(1-p) = q a - softplus(a)
-        nll = -t.sum(q*a - softplus(-a))
-    # next line was missing... hope it's all correct above
-    return BinomialCrossEntropyFormula()
-
-def squash_affine_autoencoder_formula(hidden_squash=t.tanh,
-                                      reconstruction_squash=sigmoid,
-                                      share_weights=True,
-                                      reconstruction_nll_formula=binomial_cross_entropy_formula(),
-                                      update_formula=gradient_descent_update_formula):
-    if share_weights:
-        autoencoder = squash_affine_formula(hidden_squash).rename(a='code_a') + \
-                      squash_affine_formula(reconstruction_squash).rename(x='hidden',y='reconstruction',_b='_c') + \
-                      reconstruction_nll_formula
-    else:
-        autoencoder = squash_affine_formula(hidden_squash).rename(a='code_a',_W='_W1') + \
-                      squash_affine_formula(reconstruction_squash).rename(x='hidden',y='reconstruction',_b='_c',_W='_W2') + \
-                      reconstruction_nll_formula
-    autoencoder = autoencoder + [update_formula().rename(cost = 'nll',
-                                                         param = p)
-                                 for p in autoencoder.get_all('_.*')]
-    return autoencoder
-
-    
-# @todo: try other corruption formulae. The above is the default one.
-# not quite used in the ICML paper... (had a fixed number of 0s).
-
-class DenoisingAutoEncoder(LearningAlgorithm):
-    
-    def __init__(self,n_inputs,n_hidden_per_layer,
-                 learning_rate=0.1,
-                 max_n_epochs=100,
-                 L1_regularizer=0,
-                 init_range=1.,
-                 corruption_formula = hiding_corruption_formula(),
-                 autoencoder = squash_affine_autoencoder_formula(),
-                 minibatch_size=None,linker = "c|py"):
-        for name,val in locals().items():
-            if val is not self: self.__setattribute__(name,val)
-        self.denoising_autoencoder_formula = corruption_formula + autoencoder.rename(x='corrupted_x')
-        
-    def __call__(self, training_set=None):
-        """ Allocate and optionnaly train a model
-
-        @TODO enables passing in training and valid sets, instead of cutting one set in 80/20
-        """
-        model = DenoisingAutoEncoderModel(self)
-        if training_set:
-            print 'DenoisingAutoEncoder(): what do I do if training_set????'
-            # copied from old mlp_factory_approach:
-            if len(trainset) == sys.maxint:
-                raise NotImplementedError('Learning from infinite streams is not supported')
-            nval = int(self.validation_portion * len(trainset))
-            nmin = len(trainset) - nval
-            assert nmin >= 0
-            minset = trainset[:nmin] #real training set for minimizing loss
-            valset = trainset[nmin:] #validation set for early stopping
-            best = model
-            for stp in self.early_stopper():
-                model.update(
-                    minset.minibatches([input, target], minibatch_size=min(32,
-                        len(trainset))))
-                #print 'mlp.__call__(), we did an update'
-                if stp.set_score:
-                    stp.score = model(valset, ['loss_01'])
-                    if (stp.score < stp.best_score):
-                        best = copy.copy(model)
-            model = best
-            # end of the copy from mlp_factory_approach
- 
-        return model
-
-            
-    def compile(self, inputs, outputs):
-        return theano.function(inputs,outputs,unpack_single=False,linker=self.linker)
-    
-class DenoisingAutoEncoderModel(LearnerModel):
-    def __init__(self,learning_algorithm,params):
-        self.learning_algorithm=learning_algorithm
-        self.params=params
-        v = learning_algorithm.v
-        self.update_fn = learning_algorithm.compile(learning_algorithm.denoising_autoencoder_formula.inputs,
-                                                    learning_algorithm.denoising_autoencoder_formula.outputs)
-
-    def update(self, training_set, train_stats_collector=None):
-        
-        print 'dont update you crazy frog!'
-
-# old stuff
-
-#         self._learning_rate = t.scalar('learning_rate') # this is the symbol
-#         self.L1_regularizer = L1_regularizer
-#         self._L1_regularizer = t.scalar('L1_regularizer')
-#         self._input = t.matrix('input') # n_examples x n_inputs
-#         self._W = t.matrix('W')
-#         self._b = t.row('b')
-#         self._c = t.row('b')
-#         self._regularization_term = self._L1_regularizer * t.sum(t.abs(self._W))
-#         self._corrupted_input = corruption_process(self._input)
-#         self._hidden = t.tanh(self._b + t.dot(self._input, self._W.T))
-#         self._reconstruction_activations =self._c+t.dot(self._hidden,self._W)
-#         self._nll,self._output = crossentropy_softmax_1hot(Print("output_activations")(self._output_activations),self._target_vector)
-#         self._output_class = t.argmax(self._output,1)
-#         self._class_error = t.neq(self._output_class,self._target_vector)
-#         self._minibatch_criterion = self._nll + self._regularization_term / t.shape(self._input)[0]
-#         OnlineGradientTLearner.__init__(self)
-            
-#     def attributeNames(self):
-#         return ["parameters","b1","W2","b2","W2", "L2_regularizer","regularization_term"]
-
-#     def parameterAttributes(self):
-#         return ["b1","W1", "b2", "W2"]
-    
-#     def updateMinibatchInputFields(self):
-#         return ["input","target"]
-    
-#     def updateEndOutputAttributes(self):
-#         return ["regularization_term"]
-
-#     def lossAttribute(self):
-#         return "minibatch_criterion"
-    
-#     def defaultOutputFields(self, input_fields):
-#         output_fields = ["output", "output_class",]
-#         if "target" in input_fields:
-#             output_fields += ["class_error", "nll"]
-#         return output_fields
-        
-#     def allocate(self,minibatch):
-#         minibatch_n_inputs  = minibatch["input"].shape[1]
-#         if not self._n_inputs:
-#             self._n_inputs = minibatch_n_inputs
-#             self.b1 = numpy.zeros((1,self._n_hidden))
-#             self.b2 = numpy.zeros((1,self._n_outputs))
-#             self.forget()
-#         elif self._n_inputs!=minibatch_n_inputs:
-#             # if the input changes dimension on the fly, we resize and forget everything
-#             self.forget()
-            
-#     def forget(self):
-#         if self._n_inputs:
-#             r = self._init_range/math.sqrt(self._n_inputs)
-#             self.W1 = numpy.random.uniform(low=-r,high=r,
-#                                            size=(self._n_hidden,self._n_inputs))
-#             r = self._init_range/math.sqrt(self._n_hidden)
-#             self.W2 = numpy.random.uniform(low=-r,high=r,
-#                                            size=(self._n_outputs,self._n_hidden))
-#             self.b1[:]=0
-#             self.b2[:]=0
-#             self._n_epochs=0
-
-#     def isLastEpoch(self):
-#         self._n_epochs +=1
-#         return self._n_epochs>=self._max_n_epochs
diff -r 27b1344a57b1 -r 8fff4bc26f4c sandbox/gradient_learner.py
--- a/sandbox/gradient_learner.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,71 +0,0 @@
-
-from learner import *
-from tensor import *
-import gradient
-from compile import Function
-
-class GradientLearner(Learner):
-    """
-    Base class for gradient-based optimization of a training criterion
-    that can consist in two parts, an additive part over examples, and
-    an example-independent part (usually called the regularizer).
-    The user provides a Theano formula that maps the fields of a minibatch (each being a tensor with the
-    same number of rows = minibatch size) and parameters to output fields (for the use function), one of which
-    must be a cost that is the training criterion to be minimized. Subclasses implement
-    a training strategy that uses the Theano formula to compute gradients and
-    to compute outputs in the update method.
-    The inputs, parameters, and outputs are lists of Theano tensors,
-    while the example_wise_cost and regularization_term are Theano tensors.
-    The user can specify a regularization coefficient that multiplies the regularization term.
-    The training algorithm looks for parameters that minimize
-       regularization_coefficient * regularization_term(parameters) +
-       sum_{inputs in training_set} example_wise_cost(inputs,parameters)
-    i.e. the regularization_term should not depend on the inputs, only on the parameters.
-    The learned function can map a subset of inputs to a subset of outputs (as long as the inputs subset
-    includes all the inputs required in the Theano expression for the selected outputs).
-    It is assumed that all the inputs are provided in the training set (as dataset fields
-    with the corresponding name), but not necessarily when using the learned function.
-    """
-    def __init__(self, inputs, parameters, outputs, example_wise_cost, regularization_term=astensor(0.0),
-                 regularization_coefficient = astensor(1.0)):
-        self.inputs = inputs
-        self.outputs = outputs
-        self.parameters = parameters
-        self.example_wise_cost = example_wise_cost
-        self.regularization_term = regularization_term
-        self.regularization_coefficient = regularization_coefficient
-        self.parameters_example_wise_gradient = gradient.grad(example_wise_cost, parameters)
-        self.parameters_regularization_gradient = gradient.grad(self.regularization_coefficient * regularization_term, parameters)
-        if example_wise_cost not in outputs:
-            outputs.append(example_wise_cost)
-        if regularization_term not in outputs:
-            outputs.append(regularization_term)
-        self.example_wise_gradient_fn = Function(inputs + parameters, 
-                                       [self.parameters_example_wise_gradient + self.parameters_regularization_gradient])
-        self.use_functions = {frozenset([input.name for input in inputs]+[output.name for output in outputs])
-                                        : Function(inputs, outputs)}
-
-    def use(self,input_dataset,output_fields=None,copy_inputs=True):
-        # obtain the function that maps the desired inputs to desired outputs
-        input_fields = input_dataset.fieldNames()
-        # map names of input fields to Theano tensors in self.inputs
-        input_variables = ???
-        if output_fields is None: output_fields = [output.name for output in outputs]
-        # handle special case of inputs that are directly copied into outputs
-        # map names of output fields to Theano tensors in self.outputs
-        output_variables = ???
-        use_function_key = input_fields+output_fields
-        if not self.use_functions.has_key(use_function_key):
-            self.use_function[use_function_key]=Function(input_variables,output_variables)
-        use_function = self.use_functions[use_function_key]
-        # return a dataset that computes the outputs
-        return input_dataset.apply_function(use_function,input_fields,output_fields,copy_inputs,compute_now=True)
-    
-
-class StochasticGradientDescent(object):
-    def update_parameters(self):
-        
-class StochasticGradientLearner(GradientLearner,StochasticGradientDescent):
-    def __init__(self,inputs, parameters, outputs, example_wise_cost, regularization_term=astensor(0.0),
-                 regularization_coefficient = astensor(1.0),)
-    def update()
diff -r 27b1344a57b1 -r 8fff4bc26f4c sandbox/rbm/README.txt
--- a/sandbox/rbm/README.txt	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,4 +0,0 @@
-An RBM with binomial units trained with CD-1.
-by Joseph Turian
-    
-This seems to work fine.
diff -r 27b1344a57b1 -r 8fff4bc26f4c sandbox/rbm/__init__.py
diff -r 27b1344a57b1 -r 8fff4bc26f4c sandbox/rbm/main.py
--- a/sandbox/rbm/main.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,26 +0,0 @@
-#!/usr/bin/python
-"""
-Simple SGD RBM training.
-(An example of how to use the model.)
-"""
-
-
-import numpy
-
-nonzero_instances = []
-#nonzero_instances.append({0: 1, 1: 1})
-#nonzero_instances.append({0: 1, 2: 1})
-
-nonzero_instances.append({1: 0.1, 5: 0.5, 9: 1})
-nonzero_instances.append({2: 0.3, 5: 0.5, 8: 0.8})
-nonzero_instances.append({1: 0.2, 2: 0.3, 5: 0.5})
-
-import model
-model = model.Model(input_dimension=10, hidden_dimension=6)
-
-for i in xrange(100000):
-    # Select an instance
-    instance = nonzero_instances[i % len(nonzero_instances)]
-
-    # SGD update over instance
-    model.update([instance])
diff -r 27b1344a57b1 -r 8fff4bc26f4c sandbox/rbm/model.py
--- a/sandbox/rbm/model.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,139 +0,0 @@
-"""
-The model for an autoassociator for sparse inputs, using Ronan Collobert + Jason
-Weston's sampling trick (2008).
-"""
-
-import parameters
-
-import numpy
-from numpy import dot
-import random
-
-import pylearn.nnet_ops
-import pylearn.sparse_instance
-
-def sigmoid(v):
-    """
-    @todo: Move to pylearn.more_numpy
-    @todo: Fix to avoid floating point overflow.
-    """
-#    if x < -30.0: return 0.0
-#    if x > 30.0: return 1.0 
-    return 1.0 / (1.0 + numpy.exp(-v))
-
-def sample(v):
-    """
-    @todo: Move to pylearn.more_numpy
-    """
-    assert len(v.shape) == 2
-    x = numpy.zeros(v.shape)
-    for j in range(v.shape[0]):
-        for i in range(v.shape[1]):
-            assert v[j][i] >= 0 and v[j][i] <= 1
-            if random.random() < v[j][i]: x[j][i] = 1
-            else: x[j][i] = 0
-    return x
-
-def crossentropy(output, target):
-    """
-    Compute the crossentropy of binary output wrt binary target.
-    @note: We do not sum, crossentropy is computed by component.
-    @todo: Rewrite as a scalar, and then broadcast to tensor.
-    @todo: Move to pylearn.more_numpy
-    @todo: Fix to avoid floating point overflow.
-    """
-    return -(target * numpy.log(output) + (1 - target) * numpy.log(1 - output))
-
-
-class Model:
-    """
-    @todo: input dimensions should be stored here! not as a global.
-    """
-    def __init__(self, input_dimension, hidden_dimension, learning_rate = 0.1, momentum = 0.9, weight_decay = 0.0002, random_seed = 666):
-        self.input_dimension    = input_dimension
-        self.hidden_dimension   = hidden_dimension
-        self.learning_rate      = learning_rate
-        self.momentum           = momentum
-        self.weight_decay       = weight_decay
-        self.random_seed        = random_seed
-
-        random.seed(random_seed)
-
-        self.parameters = parameters.Parameters(input_dimension=self.input_dimension, hidden_dimension=self.hidden_dimension, randomly_initialize=True, random_seed=self.random_seed)
-        self.prev_dw = 0
-        self.prev_db = 0
-        self.prev_dc = 0
-
-    def deterministic_reconstruction(self, v0):
-        """
-        One up-down cycle, but a mean-field approximation (no sampling).
-        """
-        q = sigmoid(self.parameters.b + dot(v0, self.parameters.w))
-        p = sigmoid(self.parameters.c + dot(q, self.parameters.w.T))
-        return p
-
-    def deterministic_reconstruction_error(self, v0):
-        """
-        @note: According to Yoshua, -log P(V1 = v0 | tilde(h)(v0)).
-        """
-        return crossentropy(self.deterministic_reconstruction(v0), v0)
-
-    def update(self, instances):
-        """
-        Update the L{Model} using one training instance.
-        @param instance: A dict from feature index to (non-zero) value.
-        @todo: Should assert that nonzero_indices and zero_indices
-        are correct (i.e. are truly nonzero/zero).
-        @todo: Multiply L{self.weight_decay} by L{self.learning_rate}, as done in Semantic Hashing?
-        @todo: Decay the biases too?
-        """
-        minibatch = len(instances)
-        v0 = pylearn.sparse_instance.to_vector(instances, self.input_dimension)
-        print "old XENT per instance:", numpy.sum(self.deterministic_reconstruction_error(v0))/minibatch
-        q0 = sigmoid(self.parameters.b + dot(v0, self.parameters.w))
-        h0 = sample(q0)
-        p0 = sigmoid(self.parameters.c + dot(h0, self.parameters.w.T))
-        v1 = sample(p0)
-        q1 = sigmoid(self.parameters.b + dot(v1, self.parameters.w))
-
-        dw = self.learning_rate * (dot(v0.T, h0) - dot(v1.T, q1)) / minibatch + self.momentum * self.prev_dw
-        db = self.learning_rate * numpy.sum(h0 - q1, axis=0) / minibatch + self.momentum * self.prev_db
-        dc = self.learning_rate * numpy.sum(v0 - v1, axis=0) / minibatch + self.momentum * self.prev_dc
-
-        self.parameters.w *= (1 - self.weight_decay)
-
-        self.parameters.w += dw
-        self.parameters.b += db
-        self.parameters.c += dc
-
-        self.last_dw = dw
-        self.last_db = db
-        self.last_dc = dc
-
-        print "new XENT per instance:", numpy.sum(self.deterministic_reconstruction_error(v0))/minibatch
-
-#        print
-#        print "v[0]:", v0
-#        print "Q(h[0][i] = 1 | v[0]):", q0
-#        print "h[0]:", h0
-#        print "P(v[1][j] = 1 | h[0]):", p0
-#        print "XENT(P(v[1][j] = 1 | h[0]) | v0):", numpy.sum(crossentropy(p0, v0))
-#        print "v[1]:", v1
-#        print "Q(h[1][i] = 1 | v[1]):", q1
-#
-#        print
-#        print v0.T.shape
-#        print h0.shape
-#        print dot(v0.T, h0).shape
-#        print self.parameters.w.shape
-#        self.parameters.w += self.learning_rate * (dot(v0.T, h0) - dot(v1.T, q1)) / minibatch
-#        print
-#        print h0.shape
-#        print q1.shape
-#        print self.parameters.b.shape
-#        self.parameters.b += self.learning_rate * numpy.sum(h0 - q1, axis=0) / minibatch
-#        print v0.shape, v1.shape
-#        print
-#        print self.parameters.c.shape
-#        self.parameters.c += self.learning_rate * numpy.sum(v0 - v1, axis=0) / minibatch
-#        print self.parameters
diff -r 27b1344a57b1 -r 8fff4bc26f4c sandbox/rbm/parameters.py
--- a/sandbox/rbm/parameters.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,32 +0,0 @@
-"""
-Parameters (weights) used by the L{Model}.
-"""
-
-import numpy
-
-class Parameters:
-    """
-    Parameters used by the L{Model}.
-    """
-    def __init__(self, input_dimension, hidden_dimension, randomly_initialize, random_seed):
-        """
-        Initialize L{Model} parameters.
-        @param randomly_initialize: If True, then randomly initialize
-        according to the given random_seed. If False, then just use zeroes.
-        """
-        if randomly_initialize:
-            numpy.random.random_seed(random_seed)
-            self.w = (numpy.random.rand(input_dimension, hidden_dimension)-0.5)/input_dimension
-            self.b = numpy.zeros((1, hidden_dimension))
-            self.c = numpy.zeros((1, input_dimension))
-        else:
-            self.w = numpy.zeros((input_dimension, hidden_dimension))
-            self.b = numpy.zeros((1, hidden_dimension))
-            self.c = numpy.zeros((1, input_dimension))
-
-    def __str__(self):
-        s = ""
-        s += "w: %s\n" % self.w
-        s += "b: %s\n" % self.b
-        s += "c: %s\n" % self.c
-        return s
diff -r 27b1344a57b1 -r 8fff4bc26f4c sandbox/simple_autoassociator/README.txt
--- a/sandbox/simple_autoassociator/README.txt	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,5 +0,0 @@
-This seems to work.
-
-@todo:
-    * Add momentum.
-    * Add learning rate decay schedule.
diff -r 27b1344a57b1 -r 8fff4bc26f4c sandbox/simple_autoassociator/__init__.py
diff -r 27b1344a57b1 -r 8fff4bc26f4c sandbox/simple_autoassociator/graph.py
--- a/sandbox/simple_autoassociator/graph.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,26 +0,0 @@
-"""
-Theano graph for a simple autoassociator.
-@todo: Make nearly everything private.
-"""
-
-from pylearn.nnet_ops import sigmoid, binary_crossentropy
-from theano import tensor as t
-from theano.tensor import dot
-x           = t.dmatrix()
-w1          = t.dmatrix()
-b1          = t.dvector()
-w2          = t.dmatrix()
-b2          = t.dvector()
-h           = sigmoid(dot(x, w1) + b1)
-y           = sigmoid(dot(h, w2) + b2)
-
-loss_unsummed = binary_crossentropy(y, x)
-loss = t.sum(loss_unsummed)
-
-(gw1, gb1, gw2, gb2) = t.grad(loss, [w1, b1, w2, b2])
-
-import theano.compile
-
-inputs  = [x, w1, b1, w2, b2]
-outputs = [y, h, loss, gw1, gb1, gw2, gb2]
-trainfn = theano.compile.function(inputs, outputs)
diff -r 27b1344a57b1 -r 8fff4bc26f4c sandbox/simple_autoassociator/main.py
--- a/sandbox/simple_autoassociator/main.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,31 +0,0 @@
-#!/usr/bin/python
-"""
-    A simple autoassociator.
-
-    The learned model is::
-       h   = sigmoid(dot(x, w1) + b1)
-       y   = sigmoid(dot(h, w2) + b2)
-
-    Binary xent loss.
-"""
-
-
-import numpy
-
-nonzero_instances = []
-nonzero_instances.append({0: 1, 1: 1})
-nonzero_instances.append({0: 1, 2: 1})
-
-#nonzero_instances.append({1: 0.1, 5: 0.5, 9: 1})
-#nonzero_instances.append({2: 0.3, 5: 0.5, 8: 0.8})
-##nonzero_instances.append({1: 0.2, 2: 0.3, 5: 0.5})
-
-import model
-model = model.Model(input_dimension=10, hidden_dimension=4)
-
-for i in xrange(100000):
-#    # Select an instance
-#    instance = nonzero_instances[i % len(nonzero_instances)]
-
-    # Update over instance
-    model.update(nonzero_instances)
diff -r 27b1344a57b1 -r 8fff4bc26f4c sandbox/simple_autoassociator/model.py
--- a/sandbox/simple_autoassociator/model.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,71 +0,0 @@
-"""
-The model for an autoassociator for sparse inputs, using Ronan Collobert + Jason
-Weston's sampling trick (2008).
-"""
-
-from graph import trainfn
-import parameters
-
-import numpy
-import random
-
-import pylearn.sparse_instance
-
-class Model:
-    """
-    @todo: Add momentum.
-    @todo: Add learning rate decay schedule.
-    """
-    def __init__(self, input_dimension, hidden_dimension, learning_rate = 0.1, weight_decay = 0.0002, random_seed = 666):
-        self.input_dimension    = input_dimension
-        self.hidden_dimension   = hidden_dimension
-        self.learning_rate      = learning_rate
-        self.weight_decay       = weight_decay
-        self.random_seed        = random_seed
-
-        random.seed(random_seed)
-
-        self.parameters = parameters.Parameters(input_dimension=self.input_dimension, hidden_dimension=self.hidden_dimension, randomly_initialize=True, random_seed=self.random_seed)
-
-    def deterministic_reconstruction(self, x):
-        (y, h, loss, gw1, gb1, gw2, gb2) = trainfn(x, self.parameters.w1, self.parameters.b1, self.parameters.w2, self.parameters.b2)
-        return y
-
-    def update(self, instances):
-        """
-        Update the L{Model} using one training instance.
-        @param instances: A list of dict from feature index to (non-zero) value.
-        @todo: Should assert that nonzero_indices and zero_indices
-        are correct (i.e. are truly nonzero/zero).
-        @todo: Multiply L{self.weight_decay} by L{self.learning_rate}, as done in Semantic Hashing?
-        @todo: Decay the biases too?
-        """
-        minibatch = len(instances)
-        x = pylearn.sparse_instance.to_vector(instances, self.input_dimension)
-
-        (y, h, loss, gw1, gb1, gw2, gb2) = trainfn(x, self.parameters.w1, self.parameters.b1, self.parameters.w2, self.parameters.b2)
-#        print
-#        print "instance:", instance
-#        print "x:", x
-#        print "OLD y:", y
-        print "OLD total loss:", loss
-#        print "gw1:", gw1
-#        print "gb1:", gb1
-#        print "gw2:", gw2
-#        print "gb2:", gb2
-
-        self.parameters.w1 *= (1 - self.weight_decay)
-        self.parameters.w2 *= (1 - self.weight_decay)
-
-        # SGD update
-        self.parameters.w1  -= self.learning_rate * gw1 / minibatch
-        self.parameters.b1  -= self.learning_rate * gb1 / minibatch
-        self.parameters.w2  -= self.learning_rate * gw2 / minibatch
-        self.parameters.b2  -= self.learning_rate * gb2 / minibatch
-
-#        # Recompute the loss, to make sure it's descreasing
-#        (y, h, loss, gw1, gb1, gw2, gb2) = trainfn(x, self.parameters.w1, self.parameters.b1, self.parameters.w2, self.parameters.b2)
-##        print "NEW y:", y
-#        print "NEW total loss:", loss
-##        print "h:", h
-##        print self.parameters
diff -r 27b1344a57b1 -r 8fff4bc26f4c sandbox/simple_autoassociator/parameters.py
--- a/sandbox/simple_autoassociator/parameters.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,36 +0,0 @@
-"""
-Parameters (weights) used by the L{Model}.
-"""
-
-import numpy
-
-class Parameters:
-    """
-    Parameters used by the L{Model}.
-    """
-    def __init__(self, input_dimension, hidden_dimension, randomly_initialize, random_seed):
-        """
-        Initialize L{Model} parameters.
-        @param randomly_initialize: If True, then randomly initialize
-        according to the given seed. If False, then just use zeroes.
-        """
-        if randomly_initialize:
-            numpy.random.seed(random_seed)
-            self.w1 = (numpy.random.rand(input_dimension, hidden_dimension)-0.5)/input_dimension
-            self.w2 = (numpy.random.rand(hidden_dimension, input_dimension)-0.5)/hidden_dimension
-            self.b1 = numpy.zeros(hidden_dimension)
-            self.b2 = numpy.zeros(input_dimension)
-            #self.b2 = numpy.array([10, 0, 0, -10])
-        else:
-            self.w1 = numpy.zeros((input_dimension, hidden_dimension))
-            self.w2 = numpy.zeros((hidden_dimension, input_dimension))
-            self.b1 = numpy.zeros(hidden_dimension)
-            self.b2 = numpy.zeros(input_dimension)
-
-    def __str__(self):
-        s = ""
-        s += "w1: %s\n" % self.w1
-        s += "b1: %s\n" % self.b1
-        s += "w2: %s\n" % self.w2
-        s += "b2: %s\n" % self.b2
-        return s
diff -r 27b1344a57b1 -r 8fff4bc26f4c sandbox/sparse_random_autoassociator/README.txt
--- a/sandbox/sparse_random_autoassociator/README.txt	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1 +0,0 @@
-Since simple_aa doesn't work, this probably doesn't either.
diff -r 27b1344a57b1 -r 8fff4bc26f4c sandbox/sparse_random_autoassociator/__init__.py
diff -r 27b1344a57b1 -r 8fff4bc26f4c sandbox/sparse_random_autoassociator/globals.py
--- a/sandbox/sparse_random_autoassociator/globals.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,13 +0,0 @@
-"""
-Global variables.
-"""
-
-INPUT_DIMENSION = 1000
-HIDDEN_DIMENSION = 20
-LEARNING_RATE = 0.1
-LR = LEARNING_RATE
-SEED = 666
-ZERO_SAMPLE_SIZE = 50
-#ZERO_SAMPLE_SIZE = 250
-MARGIN = 0.25
-#MARGIN = 0.0
diff -r 27b1344a57b1 -r 8fff4bc26f4c sandbox/sparse_random_autoassociator/graph.py
--- a/sandbox/sparse_random_autoassociator/graph.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,42 +0,0 @@
-"""
-Theano graph for an autoassociator for sparse inputs, which will be trained
-using Ronan Collobert + Jason Weston's sampling trick (2008).
-@todo: Make nearly everything private.
-"""
-
-from globals import MARGIN
-
-from pylearn.nnet_ops import sigmoid, binary_crossentropy
-from theano import tensor as t
-from theano.tensor import dot
-xnonzero    = t.dvector()
-w1nonzero   = t.dmatrix()
-b1          = t.dvector()
-w2nonzero   = t.dmatrix()
-w2zero      = t.dmatrix()
-b2nonzero   = t.dvector()
-b2zero      = t.dvector()
-h           = sigmoid(dot(xnonzero, w1nonzero) + b1)
-ynonzero    = sigmoid(dot(h, w2nonzero) + b2nonzero)
-yzero       = sigmoid(dot(h, w2zero) + b2zero)
-
-# May want to weight loss wrt nonzero value? e.g. MARGIN violation for
-# 0.1 nonzero is not as bad as MARGIN violation for 0.2 nonzero.
-def hingeloss(MARGIN):
-    return -MARGIN * (MARGIN < 0)
-nonzeroloss = hingeloss(ynonzero - t.max(yzero) - MARGIN)
-zeroloss = hingeloss(-t.max(-(ynonzero)) - yzero - MARGIN)
-# xnonzero sensitive loss:
-#nonzeroloss = hingeloss(ynonzero - t.max(yzero) - MARGIN - xnonzero)
-#zeroloss = hingeloss(-t.max(-(ynonzero - xnonzero)) - yzero - MARGIN)
-loss = t.sum(nonzeroloss) + t.sum(zeroloss)
-
-#loss = t.sum(binary_crossentropy(ynonzero, xnonzero)) + t.sum(binary_crossentropy(yzero, t.constant(0)))
-
-(gw1nonzero, gb1, gw2nonzero, gw2zero, gb2nonzero, gb2zero) = t.grad(loss, [w1nonzero, b1, w2nonzero, w2zero, b2nonzero, b2zero])
-
-import theano.compile
-
-inputs  = [xnonzero, w1nonzero, b1, w2nonzero, w2zero, b2nonzero, b2zero]
-outputs = [ynonzero, yzero, loss, gw1nonzero, gb1, gw2nonzero, gw2zero, gb2nonzero, gb2zero]
-trainfn = theano.compile.function(inputs, outputs)
diff -r 27b1344a57b1 -r 8fff4bc26f4c sandbox/sparse_random_autoassociator/main.py
--- a/sandbox/sparse_random_autoassociator/main.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,48 +0,0 @@
-#!/usr/bin/python
-"""
-    An autoassociator for sparse inputs, using Ronan Collobert + Jason
-    Weston's sampling trick (2008).
-
-    The learned model is::
-       h   = sigmoid(dot(x, w1) + b1)
-       y   = sigmoid(dot(h, w2) + b2)
-
-    We assume that most of the inputs are zero, and hence that
-    we can separate x into xnonzero, x's nonzero components, and
-    xzero, a sample of the zeros. We sample---randomly without
-    replacement---ZERO_SAMPLE_SIZE zero columns from x.
-
-    The desideratum is that every nonzero entry is separated from every
-    zero entry by margin at least MARGIN.
-    For each ynonzero, we want it to exceed max(yzero) by at least MARGIN.
-    For each yzero, we want it to be exceed by min(ynonzero) by at least MARGIN.
-    The loss is a hinge loss (linear). The loss is irrespective of the
-    xnonzero magnitude (this may be a limitation). Hence, all nonzeroes
-    are equally important to exceed the maximum yzero.
-
-    (Alternately, there is a commented out binary xent loss.)
-
-    LIMITATIONS:
-       - Only does pure stochastic gradient (batchsize = 1).
-       - Loss is irrespective of the xnonzero magnitude.
-       - We will always use all nonzero entries, even if the training
-       instance is very non-sparse.
-"""
-
-
-import numpy
-
-nonzero_instances = []
-nonzero_instances.append({1: 0.1, 5: 0.5, 9: 1})
-nonzero_instances.append({2: 0.3, 5: 0.5, 8: 0.8})
-nonzero_instances.append({1: 0.2, 2: 0.3, 5: 0.5})
-
-import model
-model = model.Model()
-
-for i in xrange(100000):
-    # Select an instance
-    instance = nonzero_instances[i % len(nonzero_instances)]
-
-    # SGD update over instance
-    model.update(instance)
diff -r 27b1344a57b1 -r 8fff4bc26f4c sandbox/sparse_random_autoassociator/model.py
--- a/sandbox/sparse_random_autoassociator/model.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,76 +0,0 @@
-"""
-The model for an autoassociator for sparse inputs, using Ronan Collobert + Jason
-Weston's sampling trick (2008).
-"""
-
-from graph import trainfn
-import parameters
-
-import globals
-from globals import LR
-
-import numpy
-import random
-random.seed(globals.SEED)
-
-def _select_indices(instance):
-    """
-    Choose nonzero and zero indices (feature columns) of the instance.
-    We select B{all} nonzero indices.
-    We select L{globals.ZERO_SAMPLE_SIZE} zero indices randomly,
-    without replacement.
-    @bug: If there are not ZERO_SAMPLE_SIZE zeroes, we will enter
-    an endless loop.
-    @return: (nonzero_indices, zero_indices)
-    """
-    # Get the nonzero indices
-    nonzero_indices = instance.keys()
-    nonzero_indices.sort()
-
-    # Get the zero indices
-    # @bug: If there are not ZERO_SAMPLE_SIZE zeroes, we will enter an endless loop.
-    zero_indices = []
-    while len(zero_indices) < globals.ZERO_SAMPLE_SIZE:
-        idx = random.randint(0, globals.INPUT_DIMENSION - 1)
-        if idx in nonzero_indices or idx in zero_indices: continue
-        zero_indices.append(idx)
-    zero_indices.sort()
-
-    return (nonzero_indices, zero_indices)
-
-class Model:
-    def __init__(self):
-        self.parameters = parameters.Parameters(randomly_initialize=True)
-
-    def update(self, instance):
-        """
-        Update the L{Model} using one training instance.
-        @param instance: A dict from feature index to (non-zero) value.
-        @todo: Should assert that nonzero_indices and zero_indices
-        are correct (i.e. are truly nonzero/zero).
-        """
-        (nonzero_indices, zero_indices) = _select_indices(instance)
-        # No update if there aren't any non-zeros.
-        if len(nonzero_indices) == 0: return
-        xnonzero = numpy.asarray([instance[idx] for idx in nonzero_indices])
-        print
-        print "xnonzero:", xnonzero
-
-        (ynonzero, yzero, loss, gw1nonzero, gb1, gw2nonzero, gw2zero, gb2nonzero, gb2zero) = trainfn(xnonzero, self.parameters.w1[nonzero_indices, :], self.parameters.b1, self.parameters.w2[:, nonzero_indices], self.parameters.w2[:, zero_indices], self.parameters.b2[nonzero_indices], self.parameters.b2[zero_indices])
-        print "OLD ynonzero:", ynonzero
-        print "OLD yzero:", yzero
-        print "OLD total loss:", loss
-
-        # SGD update
-        self.parameters.w1[nonzero_indices, :]  -= LR * gw1nonzero
-        self.parameters.b1						-= LR * gb1
-        self.parameters.w2[:, nonzero_indices]  -= LR * gw2nonzero
-        self.parameters.w2[:, zero_indices]		-= LR * gw2zero
-        self.parameters.b2[nonzero_indices]		-= LR * gb2nonzero
-        self.parameters.b2[zero_indices]		-= LR * gb2zero
-
-        # Recompute the loss, to make sure it's descreasing
-        (ynonzero, yzero, loss, gw1nonzero, gb1, gw2nonzero, gw2zero, gb2nonzero, gb2zero) = trainfn(xnonzero, self.parameters.w1[nonzero_indices, :], self.parameters.b1, self.parameters.w2[:, nonzero_indices], self.parameters.w2[:, zero_indices], self.parameters.b2[nonzero_indices], self.parameters.b2[zero_indices])
-        print "NEW ynonzero:", ynonzero
-        print "NEW yzero:", yzero
-        print "NEW total loss:", loss
diff -r 27b1344a57b1 -r 8fff4bc26f4c sandbox/sparse_random_autoassociator/parameters.py
--- a/sandbox/sparse_random_autoassociator/parameters.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,28 +0,0 @@
-"""
-Parameters (weights) used by the L{Model}.
-"""
-
-import numpy
-import globals
-
-class Parameters:
-    """
-    Parameters used by the L{Model}.
-    """
-    def __init__(self, input_dimension=globals.INPUT_DIMENSION, hidden_dimension=globals.HIDDEN_DIMENSION, randomly_initialize=False, seed=globals.SEED):
-        """
-        Initialize L{Model} parameters.
-        @param randomly_initialize: If True, then randomly initialize
-        according to the given seed. If False, then just use zeroes.
-        """
-        if randomly_initialize:
-            numpy.random.seed(seed)
-            self.w1 = (numpy.random.rand(input_dimension, hidden_dimension)-0.5)/input_dimension
-            self.w2 = (numpy.random.rand(hidden_dimension, input_dimension)-0.5)/hidden_dimension
-            self.b1 = numpy.zeros(hidden_dimension)
-            self.b2 = numpy.zeros(input_dimension)
-        else:
-            self.w1 = numpy.zeros((input_dimension, hidden_dimension))
-            self.w2 = numpy.zeros((hidden_dimension, input_dimension))
-            self.b1 = numpy.zeros(hidden_dimension)
-            self.b2 = numpy.zeros(input_dimension)
diff -r 27b1344a57b1 -r 8fff4bc26f4c sandbox/statscollector.py
--- a/sandbox/statscollector.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,127 +0,0 @@
-
-# Here is how I see stats collectors:
-
-def my_stats(graph):
-    graph.mse=examplewise_mean(square_norm(graph.residue))
-    graph.training_loss=graph.regularizer+examplewise_sum(graph.nll)
-    return [graph.mse,graph.training_loss]
-    
-
-#    def my_stats(residue,nll,regularizer):
-#            mse=examplewise_mean(square_norm(residue))
-#            training_loss=regularizer+examplewise_sum(nll)
-#            set_names(locals())
-#            return ((residue,nll),(regularizer),(),(mse,training_loss))
-#    my_stats_collector = make_stats_collector(my_stats)
-#
-# where make_stats_collector calls my_stats(examplewise_fields, attributes) to
-# construct its update function, and figure out what are the input fields (here "residue"
-# and "nll") and input attributes (here "regularizer") it needs, and the output
-# attributes that it computes (here "mse" and "training_loss"). Remember that
-# fields are examplewise quantities, but attributes are not, in my jargon.
-# In the above example, I am highlighting that some operations done in my_stats
-# are examplewise and some are not.  I am hoping that theano Ops can do these
-# kinds of internal side-effect operations (and proper initialization of these hidden
-# variables). I expect that a StatsCollector (returned by make_stats_collector)
-# knows the following methods:
-#     stats_collector.input_fieldnames
-#     stats_collector.input_attribute_names
-#     stats_collector.output_attribute_names
-#     stats_collector.update(mini_dataset)
-#     stats_collector['mse']
-# where mini_dataset has the input_fieldnames() as fields and the input_attribute_names()
-# as attributes, and in the resulting dataset the output_attribute_names() are set to the
-# proper numeric values.
-
-
-
-import theano
-from theano import tensor as t
-from Learner import Learner
-from lookup_list import LookupList
-
-class StatsCollectorModel(AttributesHolder):
-    def __init__(self,stats_collector):
-        self.stats_collector = stats_collector
-        self.outputs = LookupList(stats_collector.output_names,[None for name in stats_collector.output_names])
-        # the statistics get initialized here
-        self.update_function = theano.function(input_attributes+input_fields,output_attributes+output_fields,linker="c|py")
-        for name,value in self.outputs.items():
-            self.__setattribute__(name,value)
-    def update(self,dataset):
-        input_fields = dataset.fields()(self.stats_collector.input_field_names)
-        input_attributes = dataset.getAttributes(self.stats_collector.input_attribute_names)
-        self.outputs._values = self.update_function(input_attributes+input_fields)
-        for name,value in self.outputs.items():
-            self.__setattribute__(name,value)
-    def __call__(self):
-        return self.outputs
-    def attributeNames(self):
-        return self.outputs.keys()
-    
-class StatsCollector(AttributesHolder):
-        
-    def __init__(self,input_attributes, input_fields, outputs):
-        self.input_attributes = input_attributes
-        self.input_fields = input_fields
-        self.outputs = outputs
-        self.input_attribute_names = [v.name for v in input_attributes]
-        self.input_field_names = [v.name for v in input_fields]
-        self.output_names = [v.name for v in output_attributes]
-            
-    def __call__(self,dataset=None):
-        model = StatsCollectorModel(self)
-        if dataset:
-            self.update(dataset)
-        return model
-
-if __name__ == '__main__':
-    def my_statscollector():
-        regularizer = t.scalar()
-        nll = t.matrix()
-        class_error = t.matrix()
-        total_loss = regularizer+t.examplewise_sum(nll)
-        avg_nll = t.examplewise_mean(nll)
-        avg_class_error = t.examplewise_mean(class_error)
-        for name,val in locals().items(): val.name = name
-        return StatsCollector([regularizer],[nll,class_error],[total_loss,avg_nll,avg_class_error])
-    
-
-
-
-# OLD DESIGN:
-#
-# class StatsCollector(object):
-#     """A StatsCollector object is used to record performance statistics during training
-#     or testing of a learner. It can be configured to measure different things and
-#     accumulate the appropriate statistics. From these statistics it can be interrogated
-#     to obtain performance measures of interest (such as maxima, minima, mean, standard
-#     deviation, standard error, etc.). Optionally, the observations can be weighted
-#     (yielded weighted mean, weighted variance, etc., where applicable). The statistics
-#     that are desired can be specified among a list supported by the StatsCollector
-#     class or subclass. When some statistics are requested, others become automatically
-#     available (e.g., sum or mean)."""
-#
-#     default_statistics = [mean,standard_deviation,min,max]
-#    
-#     __init__(self,n_quantities_observed, statistics=default_statistics):
-#         self.n_quantities_observed=n_quantities_observed
-#
-#     clear(self):
-#         raise NotImplementedError
-#
-#     update(self,observations):
-#         """The observations is a numpy vector of length n_quantities_observed. Some
-#         entries can be 'missing' (with a NaN entry) and will not be counted in the
-#         statistics."""
-#         raise NotImplementedError
-#
-#     __getattr__(self, statistic)
-#         """Return a particular statistic, which may be inferred from the collected statistics.
-#         The argument is a string naming that statistic."""
-        
-
-    
-
-    
-    
diff -r 27b1344a57b1 -r 8fff4bc26f4c setup.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/setup.py	Mon Mar 30 20:48:04 2009 -0400
@@ -0,0 +1,14 @@
+#!/bin/env python
+
+from ez_setup import use_setuptools
+use_setuptools()
+from setuptools import setup, find_packages, Extension, Library
+setup(name="Pylearn",
+      version="0.1",
+      description="Pylearn",
+      long_description="""Machine learning toolkit""",
+      author="LISA",
+      author_email="pylearn-dev@googlegroups.com",
+      packages=find_packages(exclude='tests'),
+)
+
diff -r 27b1344a57b1 -r 8fff4bc26f4c sparse_instance.py
--- a/sparse_instance.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,22 +0,0 @@
-"""
-Sparse instances.
-Each instance is represented as dict with key dimension.
-Dimensions not present in the dict have value 0.
-"""
-
-from numpy import zeros
-
-def to_vector(instances, dimensions):
-    """
-    Convert sparse instances to vectors.
-    @type instances: list of sparse instances
-    @param dimensions: The number of dimensions in each instance.
-    @rtype: numpy matrix (instances x dimensions)
-    @todo: Allow this function to convert SINGLE instances (not lists).
-    """
-    v = zeros((len(instances), dimensions))
-    l = len(instances)
-    for i in range(l):
-        for idx in instances[i].keys():
-            v[i][idx] = instances[i][idx]
-    return v
diff -r 27b1344a57b1 -r 8fff4bc26f4c squashfn.py
--- a/squashfn.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,14 +0,0 @@
-
-def squashfn(str):
-    if str == "sigmoid":
-        import theano.tensor.nnet as nnet
-        return nnet.sigmoid
-    elif str == "tanh":
-        import theano.tensor as t
-        return t.tanh
-    elif str == "softsign":
-        from theano.sandbox.softsign import softsign
-        return softsign
-    else: assert 0
-
-
diff -r 27b1344a57b1 -r 8fff4bc26f4c stat_ops.py
--- a/stat_ops.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,92 +0,0 @@
-
-import theano
-from theano import gof
-from theano import tensor
-import numpy
-
-
-class ExampleWiseMean(gof.Op):
-    
-    def __init__(self):
-        self.destroy_map = {0: [1, 2]}
-
-    def make_node(self, x):
-        return gof.Apply(self,
-                         [x, tensor.value(float('nan')), tensor.value(0)],
-                         [tensor.Tensor(dtype = 'float64',
-                                        broadcastable = x.type.broadcastable)()])
-
-    def perform(self, node, (x, sum, n), (out,)):
-        if numpy.isnan(sum).any():
-            sum.resize(x.shape, refcheck=0)
-            sum[:] = x
-        else:
-            sum += x
-        n += 1
-        out[0] = sum / n
-
-    def c_code(self, name, node, (x, sum, n), (out, ), sub):
-        return """
-        PyObject* multi;
-        int nelems;
-        if (isnan(((double*)(%(sum)s->data))[0])) {
-            PyArray_Dims dims;
-            dims.len = %(x)s->nd;
-            dims.ptr = %(x)s->dimensions;
-            PyArray_Resize(%(sum)s, &dims, 0, PyArray_CORDER);
-            multi = PyArray_MultiIterNew(2, %(sum)s, %(x)s);
-            nelems = PyArray_SIZE(%(sum)s);
-            while (nelems--) {
-                // Copy %(x)s in %(sum)s
-                *(double*)PyArray_MultiIter_DATA(multi, 0) = *(double*)PyArray_MultiIter_DATA(multi, 1);
-                PyArray_MultiIter_NEXT(multi);
-            }
-        }
-        else {
-            // Add some error checking on the size of x
-            multi = PyArray_MultiIterNew(2, %(sum)s, %(x)s);
-            nelems = PyArray_SIZE(%(sum)s);
-            while (nelems--) {
-                // Add %(x)s to %(sum)s
-                *(double*)PyArray_MultiIter_DATA(multi, 0) += *(double*)PyArray_MultiIter_DATA(multi, 1);
-                PyArray_MultiIter_NEXT(multi);
-            }
-        }
-        ((npy_int64*)(%(n)s->data))[0]++;
-        int n = ((npy_int64*)(%(n)s->data))[0];
-        if (%(out)s == NULL) {
-            %(out)s = (PyArrayObject*)PyArray_EMPTY(%(sum)s->nd, %(sum)s->dimensions, NPY_FLOAT64, 0);
-        }
-        multi = PyArray_MultiIterNew(2, %(sum)s, %(out)s);
-        nelems = PyArray_SIZE(%(sum)s);
-        while (nelems--) {
-            // %(out)s <- %(sum)s / %(n)s
-            *(double*)PyArray_MultiIter_DATA(multi, 1) = *(double*)PyArray_MultiIter_DATA(multi, 0) / n;
-            PyArray_MultiIter_NEXT(multi);
-        }        
-        """ % dict(locals(), **sub)
-
-
-
-if __name__ == '__main__':
-    
-    vectors = numpy.random.RandomState(666).rand(10, 2)
-
-    x = tensor.dvector()
-    e = ExampleWiseMean()(x)
-
-    # f = theano.function([x], [e], linker = 'py')
-
-    # for i, v in enumerate(vectors):
-    #     print v, "->", f(v), numpy.mean(vectors[:i+1], axis=0)
-
-    # print
-
-    f = theano.function([x], [e], linker = 'c|py')
-
-    for i, v in enumerate(vectors):
-        print v, "->", f(v), numpy.mean(vectors[:i+1], axis=0)
-
-
-
-
diff -r 27b1344a57b1 -r 8fff4bc26f4c stopper.py
--- a/stopper.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,122 +0,0 @@
-"""Early stopping iterators
-
-The idea here is to supply early-stopping heuristics that can be used in the
-form:
-
-    stopper = SomeEarlyStopper()
-
-    for i in stopper():
-        # train from data
-        if i.set_score:
-            i.score = validation_score
-
-
-So far I only have one heuristic, so maybe this won't scale.
-"""
-
-class Stopper(object):
-
-    def train(self, data, update_rows_fn, update, validate, save=None):
-        """Return the best model trained on data
-
-        Parameters:
-        data - a thing that accepts getitem(<list of int64>), or a tuple of such things
-        update_rows_fn - fn : int --> <list or tensor of int>
-        update - fn: update an internal model from elements of data
-        validate - fn: evaluate an internal model based on elements of data
-        save - fn: return a copy of the internal model
-
-        The body of this function exhausts the <self> iterator, and trains a
-        model using early stopping in the process.
-        """
-
-        best = None
-        for stp in self:
-            i = stp.iter
-
-            # call update on some training set rows
-            t_rows = update_rows_fn(i)
-            if isinstance(data, (tuple, list)):
-                update(*[d[t_rows] for d in data])
-            else:
-                update(data[t_rows])
-
-            if stp.set_score:
-                stp.score = validate()
-                if (stp.score < stp.best_score) and save:
-                    best = save()
-        return best
-
-    def find_min(self, step, check, save):
-        best = None
-        for stp in self:
-            step()
-            if stp.set_score:
-                stp.score = check()
-                if (stp.score < stp.best_score) and save:
-                    best = (save(), stp.iter, stp.score)
-        return best
-
-
-
-class ICML08Stopper(Stopper):
-    @staticmethod
-    def icml08(ntrain, batchsize):
-        """Some setting similar to what I used for ICML08 submission"""
-        #TODO: what did I actually use? put that in here.
-        return ICML08Stopper(30*ntrain/batchsize,
-                ntrain/batchsize, 0.96, 2.0, 100000000)
-
-    def __init__(self, i_wait, v_int, min_improvement, patience, hard_limit):
-        self.initial_wait = i_wait
-        self.set_score_interval = v_int
-        self.min_improvement = min_improvement
-        self.patience = patience
-        self.hard_limit = hard_limit
-
-        self.best_score = float('inf')
-        self.best_iter = -1
-        self.iter = -1
-
-        self.set_score = False
-        self.score = None
-
-    def __iter__(self):
-        return self
-
-    E_set_score = 'when iter.set_score is True, caller must assign a score to iter.score'
-    def next(self):
-
-        #print 'ICML08 stopper, were doing a next'
-
-        if self.set_score: #left over from last time
-            if self.score is None:
-                raise Exception(ICML08Stopper.E_set_score)
-            if self.score < (self.best_score * self.min_improvement):
-                (self.best_score, self.best_iter) = (self.score, self.iter)
-            self.score = None #un-set it
-
-
-        starting = self.iter < self.initial_wait
-        waiting = self.iter < (self.patience * self.best_iter)
-        if starting or waiting:
-            # continue to iterate
-            self.iter += 1
-            if self.iter == self.hard_limit:
-                raise StopIteration
-            self.set_score = (self.iter % self.set_score_interval == 0)
-            return self
-
-        raise StopIteration
-
-
-class NStages(ICML08Stopper):
-    """Run for a fixed number of steps, checking validation set every so
-    often."""
-    def __init__(self, hard_limit, v_int):
-        ICML08Stopper.__init__(self, hard_limit, v_int, 1.0, 1.0, hard_limit)
-
-    #TODO: could optimize next() function. Most of what's in ICML08Stopper.next()
-    #is not necessary
-
-
diff -r 27b1344a57b1 -r 8fff4bc26f4c test_speed.py
--- a/test_speed.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,79 +0,0 @@
-import numpy
-from dataset import *
-from misc import *
-def test_speed(array, ds):
-    print "test_speed", ds.__class__
-
-    mat = numpy.random.rand(400,100)
-
-    @print_timing
-    def f_array_full(a):
-        a+1
-    @print_timing
-    def f_array_index(a):
-        for id in range(a.shape[0]):
-#            pass
-            a[id]+1
-#            a[id]*mat
-    @print_timing
-    def f_array_iter(a):
-        for r in a:
-#            pass
-            r+1
-#            r*mat
-    @print_timing
-    def f_ds_index(ds):
-        for id in range(len(ds)):
-#            pass
-            ds[id][0]+1
-#            ds[id][0]*mat
-    @print_timing
-    def f_ds_iter(ds):
-        for ex in ds:
-#            pass
-            ex[0]+1
-#            a[0]*mat
-    @print_timing
-    def f_ds_mb1(ds,mb_size):
-        for exs in ds.minibatches(minibatch_size = mb_size):
-            for ex in exs:
-#                pass
-                ex[0]+1
-#                ex[0]*mat
-    @print_timing
-    def f_ds_mb2(ds,mb_size):
-        for exs in ds.minibatches(minibatch_size = mb_size):
-#            pass
-            exs[0]+1
-#            ex[0]*mat
-
-    f_array_full(array)
-    f_array_index(array)
-    f_array_iter(array)
-
-    f_ds_index(ds)
-    f_ds_iter(ds)
-
-    f_ds_mb1(ds,10)
-    f_ds_mb1(ds,100)
-    f_ds_mb1(ds,1000)
-    f_ds_mb1(ds,10000)
-    f_ds_mb2(ds,10)
-    f_ds_mb2(ds,100)
-    f_ds_mb2(ds,1000)
-    f_ds_mb2(ds,10000)
-
-if __name__=='__main__':
-    a2 = numpy.random.rand(100000,400)
-    ds1 = ArrayDataSet(a2,{'all':slice(0,a2.shape[1],1)})
-    test_speed(a2,ds1)
-    a1 = numpy.random.rand(100000,40)
-    ds4 = ArrayDataSet(a1,LookupList(["f"+str(x)for x in range(a1.shape[1])],
-                                     range(a1.shape[1])))
-    test_speed(a2,ds4)
-    ds2=CachedDataSet(ds1,cache_all_upon_construction=False)
-    test_speed(a2,ds2)
-    ds3=CachedDataSet(ds1,cache_all_upon_construction=True)
-    test_speed(a2,ds3)
-    del a2,ds1,ds2,ds3
-
diff -r 27b1344a57b1 -r 8fff4bc26f4c version.py
--- a/version.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,292 +0,0 @@
-import subprocess as _subprocess
-import imp as _imp
-import sys
-import os
-
-
-_cache = dict()
-
-def src_version(module_name):
-    """Return compact identifier of module code.
-
-    @return: compact identifier of module code.
-    @rtype: string
-
-    @note: This function tries to establish that the source files and the repo
-    are syncronized.  It raises an Exception if there are un-tracked '.py'
-    files, or if there are un-committed modifications.  This implementation uses
-    "hg id" to establish this.  The code returned by "hg id" is not affected by
-    hg pull, but pulling might remove the " tip" string which might have
-    appeared.  This implementation ignores the  " tip" information, and only
-    uses the code.
-
-    @note: This implementation is assumes that the import directory is under
-    version control by mercurial.
-
-    """
-
-    if module_name not in _cache:
-
-        try :
-            location = _imp.find_module(module_name)[1]
-        except ImportError:
-            _cache[module_name] = None
-            return None
-        #print 'location:', location
-        isdir = False
-        if os.path.isdir(location) :
-            isdir = True
-        elif os.path.isfile(location) :
-            isdir = False
-        else :
-            # SEEMS THIS CASE EXIST, FOR WEIRD BUILTIN FUNCTIONS
-            #print location,": it's 'not a dir, it's not a file, it's superman!"
-            #raise Exception('Unknown location or file type')
-            _cache[module_name] = None
-            return None
-
-
-        # we're dealing with a dir
-        if isdir :
-
-            # under hg?
-            if not os.path.exists( os.path.join( location , '.hg') ) :
-                _cache[module_name] = None
-                return None
-
-            status = _subprocess.Popen(('hg','st'),cwd=location,stdout=_subprocess.PIPE).communicate()[0]
-            #print 'status =', status
-            #TODO: check that the process return code is 0 (ticket #45)
-
-            #status_codes = [line[0] for line in  if line and line[0] != '?']
-            for line in status.split('\n'):
-                if not line: continue
-                if line[0] != '?':
-                    raise Exception('Uncommitted modification to "%s" in %s (%s)'
-                        %(line[2:], __name__,location))
-                if line[0] == '?' and line[-3:] == '.py':
-                    raise Exception('Untracked file "%s" in %s (%s)'
-                        %(line[2:], __name__, location))
-
-            hg_id = _subprocess.Popen(('hg','id'),cwd=location,stdout=_subprocess.PIPE).communicate()[0]
-
-            # This asserts my understanding of hg id return values
-            # There is mention in the doc that it might return two parent hash codes
-            # but I've never seen it, and I dont' know what it means or how it is
-            # formatted.
-            tokens = hg_id.split(' ')
-            assert len(tokens) <= 2
-            assert len(tokens) >= 1
-            assert tokens[0][-1] != '+' # the trailing + indicates uncommitted changes
-            if len(tokens) == 2:
-                assert tokens[1] == 'tip\n'
-
-            _cache[module_name] = tokens[0]
-
-        # we're dealing with a file
-        if not isdir :
-
-            folder = os.path.split( os.path.abspath(location) )[0]
-            # under hg?
-            if not os.path.exists( os.path.join( folder , '.hg') ) :
-                _cache[module_name] = None
-                return None
-
-            status = _subprocess.Popen(('hg','st',location),cwd=folder,stdout=_subprocess.PIPE).communicate()[0]
-            #print 'status =', status
-
-            #status_codes = [line[0] for line in  if line and line[0] != '?']
-            for line in status.split('\n'):
-                if not line: continue
-                if line[0] != '?':
-                    raise Exception('Uncommitted modification to "%s" in %s (%s)'
-                        %(line[2:], location,folder))
-                if line[0] == '?' and line[-3:] == '.py':
-                    raise Exception('Untracked file "%s" in %s (%s)'
-                        %(line[2:], location, folder))
-
-            hg_id = _subprocess.Popen(('hg','id'),cwd=folder,stdout=_subprocess.PIPE).communicate()[0]
-
-            # This asserts my understanding of hg id return values
-            # There is mention in the doc that it might return two parent hash codes
-            # but I've never seen it, and I dont' know what it means or how it is
-            # formatted.
-            tokens = hg_id.split(' ')
-            assert len(tokens) <= 2
-            assert len(tokens) >= 1
-            if tokens[0][-1] == '+' :
-                tokens[0] = tokens[0][:-1] # the change was not on this file
-            if len(tokens) == 2:
-                assert tokens[1] == 'tip\n'
-
-            _cache[module_name] = tokens[0]
-
-
-    return _cache[module_name]
-
-_unknown_version = 'unknown version'
-
-def hg_version(dirname, filenames=None):
-    """Return current changeset of directory I{dirname}.
-
-    @type filename: list of str (or default: None)
-    @param filename: if specified, we ignore modifications to other files.
-
-    @rtype: tuple (last changeset, modified)
-
-    """
-    if type(filenames) not in (list, tuple, type(None)):
-        raise TypeError(filenames) 
-
-    #may raise exception, for example if hg is not visible via PATH
-    status_proc = _subprocess.Popen(('hg','st'), cwd=dirname, 
-            stdout=_subprocess.PIPE, stderr=_subprocess.PIPE)
-    status = status_proc.communicate()[0] #read stdout into buffer
-    if status_proc.returncode != 0:
-        raise OSError('hg returned %i, maybe %s is not under hg control?',
-                (status_proc.returncode, dirname))
-
-    #may raise exception, for example if hg is not visible via PATH
-    id_proc = _subprocess.Popen(('hg','id', '-i'), cwd=dirname,
-            stdout=_subprocess.PIPE, stderr=_subprocess.PIPE)
-    id_stdout = id_proc.communicate()[0]
-    if id_proc.returncode != 0:
-        raise OSError('hg returned %i, maybe %s is not under hg control?', 
-                (id_proc.returncode, dirname))
-
-    care_about = (lambda some_file : True) if filenames is None \
-            else (lambda some_file : some_file in filenames)
-
-    # parse status codes for what we care about
-    care_about_mod = False
-    for line in status.split('\n'):
-        if not line:  #empty lines happen
-            continue
-        line_file = line[2:]
-        if line[0] != '?' and care_about(line_file): 
-            care_about_mod = True
-            #raise Exception('Uncommitted modification', 
-                    #os.path.join(dirname, line_file))
-        if line[0] == '?' and line[-3:] == '.py':
-            print >> sys.stderr, 'WARNING: untracked file', os.path.join(dirname, line_file)
-
-    # id_stdout is 12 hex digits followed by '+\n' or '\n'
-    # return the trailing '+' character only if there were changes to files that
-    # the caller cares about (named in filenames)
-    modified = (id_stdout[12] == '+')
-    assert len(id_stdout) in (13, 14) #sanity check
-    if modified and care_about_mod :
-        return id_stdout[:13]
-    else:
-        return id_stdout[:12]
-
-def _import_id_py_source(location):
-    try:
-        dirname = os.path.dirname(location[1])
-        basename = os.path.basename(location[1])
-        return hg_version(dirname, [basename])
-    except OSError, e:
-        print >> sys.stderr, 'IGNORNING', e
-        return _unknown_version + ' PY_SOURCE'
-
-def _import_id_py_compiled(location):
-    #a .pyc file was found, but no corresponding .py
-    return _unknown_version + ' PYC_COMPILED'
-
-def _import_id_pkg_directory(location):
-    try:
-        return hg_version(location[1])
-    except OSError, e:
-        print >> sys.stderr, 'IGNORNING', e
-        return _unknown_version + ' PKG_DIRECTORY'
-
-def _import_id(tag):
-    try :
-        location = _imp.find_module(tag)
-    except ImportError, e: #raise when tag is not found
-        return e #put this in the cache, import_id will raise it
-
-    #the find_module was successful, location is valid
-    resource_type = location[2][2]
-
-    if resource_type == _imp.PY_SOURCE:
-        return _import_id_py_source(location)
-    if resource_type == _imp.PY_COMPILED:
-        return _import_id_py_compiled(location)
-    if resource_type == _imp.C_EXTENSION:
-        raise NoteImplementedError
-    if resource_type == _imp.PY_RESOURCE:
-        raise NoteImplementedError
-    if resource_type == _imp.PKG_DIRECTORY:
-        return _import_id_pkg_directory(location)
-    if resource_type == _imp.C_BUILTIN:
-        raise NoteImplementedError
-    if resource_type == _imp.PY_FROZEN:
-        raise NoteImplementedError
-
-    assert False #the list of resource types above should be exhaustive
-
-def import_id(tag):
-    """Return an identifier of the code imported by 'import <tag>'.
-
-    @param tag: a module or file name
-    @type tag: string
-
-    @rtype: string
-    @return: identifier of the code imported by 'import <tag>'.
-
-    This high-level function might do different things depending on, for
-    example, whether I{tag} identifies a file or a directory, or whether the
-    named entity is under some sort of version/revision control.
-
-    Versions are sought in the following order:
-    0. If I{tag} is 'python' then sys.version will be returned
-    1. If I{tag} names a file or folder under revision control, this function
-    will attempt to guess which one, and return a string that identifies the
-    running code (a revision id, not the whole file!)
-    2.  If I{tag} names a module with a __version__ attribute, then that
-    attribute will be returned as a string.
-    3. The string starting with 'unknown version' will be returned for other valid modules.
-    4. An exception will be raise for non-existent modules.
-
-    @note: This function may import the named entity in order to return a
-    __version__ module attribute.
-
-    """
-    if tag not in import_id.cache:
-        import_id.cache[tag] = _import_id(tag)
-
-    #in the case of bad module names, we cached the ImportError exception
-    rval = import_id.cache[tag]
-    if isinstance(rval, Exception):
-        raise rval
-    return rval
-import_id.cache = {'python':sys.version}
-
-def get_all_src_versions() :
-    """
-    Get the version of all loaded module.
-    Calls src_version on all loaded modules. These modules are found
-    using sys.modules.
-
-    Returns a dictionnary: name->version.
-    
-    @RETURN dict Dictionnary (module's name) -> (version)
-    @SEE src_version
-    """
-    allmodules = sys.modules
-    d = dict()
-    for m in allmodules :
-        try:
-            d[m] = import_id(m)
-        except:
-            pass
-    return d
-
-
-if __name__ == "__main__" :
-
-    if len(sys.argv) == 2 :
-        print 'testing on', sys.argv[1]
-        print import_id(sys.argv[1])
-        
diff -r 27b1344a57b1 -r 8fff4bc26f4c weights.py
--- a/weights.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,40 +0,0 @@
-"""
-Routine to initialize weights.
-
-@note: We assume that numpy.random.seed() has already been performed.
-"""
-
-from math import pow, sqrt
-import numpy.random
-
-sqrt3 = sqrt(3.0)
-def random_weights(nin, nout, scale_by=1./sqrt3, power=0.5):
-    """
-    Generate an initial weight matrix with nin inputs (rows) and nout
-    outputs (cols).
-    Each weight is chosen uniformly at random to be in range:
-        [-scale_by*sqrt(3)/pow(nin,power), +scale_by*sqrt(3)/pow(nin,power)]
-    @note: Play with scale_by, but reasonable values are <=1, maybe 1./sqrt3
-    power=0.5 is strongly recommanded (see below).
-
-    Suppose these weights w are used in dot products as follows:
-       output = w' input
-    If w ~ Uniform(-r,r) and Var[input_i]=1 and x_i's are independent, then
-       Var[w]=r2/3
-       Var[output] = Var[ sum_{i=1}^d w_i input_i] = d r2 / 3
-    To make sure that variance is not changed after the dot product,
-    we therefore want Var[output]=1 and r = sqrt(3)/sqrt(d).  This choice
-    corresponds to the default values scale_by=sqrt(3) and power=0.5.
-    More generally we see that Var[output] = Var[input] * scale_by.
-
-    Now, if these are weights in a deep multi-layer neural network,
-    we would like the top layers to be initially more linear, so as to let
-    gradients flow back more easily (this is an explanation by Ronan Collobert).
-    To achieve this we want scale_by smaller than 1.
-    Ronan used scale_by=1/sqrt(3) (by mistake!) and got better results than scale_by=1
-    in the experiment of his ICML'2008 paper.
-    Note that if we have a multi-layer network, ignoring the effect of the tanh non-linearity,
-    the variance of the layer outputs would go down roughly by a factor 'scale_by' at each
-    layer (making the layers more linear as we go up towards the output).
-    """
-    return (numpy.random.rand(nin, nout) * 2.0 - 1) * scale_by * sqrt3 / pow(nin,power)
diff -r 27b1344a57b1 -r 8fff4bc26f4c xlogx.py
--- a/xlogx.py	Thu Nov 20 06:38:06 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,28 +0,0 @@
-
-import theano
-from theano import tensor, scalar
-import numpy
-
-class XlogX(scalar.UnaryScalarOp):
-    """
-    Compute X * log(X), with special case 0 log(0) = 0.
-    """
-    @staticmethod
-    def st_impl(x):
-        if x == 0.0:
-            return 0.0
-        return x * numpy.log(x)
-    def impl(self, x):
-        return XlogX.st_impl(x)
-    def grad(self, (x,), (gz,)):
-        return [gz * (1 + scalar.log(x))]
-    def c_code(self, node, name, (x,), (z,), sub):
-        if node.inputs[0].type in [scalar.float32, scalar.float64]:
-            return """%(z)s =
-                %(x)s == 0.0
-                ? 0.0
-                : %(x)s * log(%(x)s);""" % locals()
-        raise NotImplementedError('only floatingpoint is implemented')
-scalar_xlogx  = XlogX(scalar.upgrade_to_float, name='scalar_xlogx')
-xlogx = tensor.Elemwise(scalar_xlogx, name='xlogx')
-