changeset 242:ef70a665aaaf

Hmm... that was committed by Fred I think, I got lost by Mercurial I think
author delallea@opale.iro.umontreal.ca
date Fri, 30 May 2008 10:19:16 -0400
parents ddb88a8e9fd2 (current diff) 97f35d586727 (diff)
children c8f19a9eb10f
files dataset.py
diffstat 4 files changed, 20 insertions(+), 37 deletions(-) [+]
line wrap: on
line diff
--- a/dataset.py	Fri May 30 10:14:46 2008 -0400
+++ b/dataset.py	Fri May 30 10:19:16 2008 -0400
@@ -987,7 +987,7 @@
         for fieldname, fieldcolumns in self.fields_columns.items():
             if type(fieldcolumns) is int:
                 assert fieldcolumns>=0 and fieldcolumns<data_array.shape[1]
-                if 0:
+                if 1:
                     #I changed this because it didn't make sense to me,
                     # and it made it more difficult to write my learner.
                     # If it breaks stuff, let's talk about it.
@@ -1054,6 +1054,8 @@
                 assert offset>=0 and offset<len(dataset.data)
                 assert offset+minibatch_size<=len(dataset.data)
                 self.current=offset
+                self.columns = [self.dataset.fields_columns[f] 
+                                for f in self.minibatch._names]
             def __iter__(self):
                 return self
             def next(self):
@@ -1062,7 +1064,8 @@
                 if self.current>=self.dataset.data.shape[0]:
                     raise StopIteration
                 sub_data =  self.dataset.data[self.current]
-                self.minibatch._values = [sub_data[self.dataset.fields_columns[f]] for f in self.minibatch._names]
+                self.minibatch._values = [sub_data[c] for c in self.columns]
+
                 self.current+=self.minibatch_size
                 return self.minibatch
 
--- a/denoising_aa.py	Fri May 30 10:14:46 2008 -0400
+++ b/denoising_aa.py	Fri May 30 10:19:16 2008 -0400
@@ -9,6 +9,7 @@
 from nnet_ops import *
 import math
 from misc import *
+from misc_theano import *
 from theano.tensor_random import binomial
 
 def hiding_corruption_formula(seed,average_fraction_hidden):
--- a/misc.py	Fri May 30 10:14:46 2008 -0400
+++ b/misc.py	Fri May 30 10:19:16 2008 -0400
@@ -1,24 +1,3 @@
-
-import theano
-
-class Print(theano.Op):
-    def __init__(self,message=""):
-        self.message=message
-        self.view_map={0:[0]}
-
-    def make_node(self,xin):
-        xout = xin.type.make_result()
-        return theano.Apply(op = self, inputs = [xin], outputs=[xout])
-
-    def perform(self,node,inputs,output_storage):
-        xin, = inputs
-        xout, = output_storage
-        xout[0] = xin
-        print self.message,xin
-
-    def grad(self,input,output_gradients):
-        return output_gradients
-
 
 def unique_elements_list_intersection(list1,list2):
     """
--- a/test_dataset.py	Fri May 30 10:14:46 2008 -0400
+++ b/test_dataset.py	Fri May 30 10:19:16 2008 -0400
@@ -194,38 +194,41 @@
     m=ds.minibatches(['x','y'],n_batches=1,minibatch_size=3,offset=4)
     assert isinstance(m,DataSet.MinibatchWrapAroundIterator)
     for x,y in m:
-        assert len(x)==3
-        assert len(y)==3
-        for id in range(3):
+        assert len(x)==m.minibatch_size
+        assert len(y)==m.minibatch_size
+        for id in range(m.minibatch_size):
             assert (numpy.append(x[id],y[id])==array[i+4]).all()
             i+=1
-    assert i==3
+    assert i==m.n_batches*m.minibatch_size
     del x,y,i,id,m
 
     i=0
     m=ds.minibatches(['x','y'],n_batches=2,minibatch_size=3,offset=4)
     assert isinstance(m,DataSet.MinibatchWrapAroundIterator)
     for x,y in m:
-        assert len(x)==3
-        assert len(y)==3
-        for id in range(3):
+        assert len(x)==m.minibatch_size
+        assert len(y)==m.minibatch_size
+        for id in range(m.minibatch_size):
             assert (numpy.append(x[id],y[id])==array[i+4]).all()
             i+=1
-    assert i==6
+    assert i==m.n_batches*m.minibatch_size
     del x,y,i,id,m
 
     i=0
     m=ds.minibatches(['x','y'],n_batches=20,minibatch_size=3,offset=4)
     assert isinstance(m,DataSet.MinibatchWrapAroundIterator)
     for x,y in m:
-        assert len(x)==3
-        assert len(y)==3
-        for id in range(3):
+        assert len(x)==m.minibatch_size
+        assert len(y)==m.minibatch_size
+        for id in range(m.minibatch_size):
             assert (numpy.append(x[id],y[id])==array[(i+4)%array.shape[0]]).all()
             i+=1
     assert i==m.n_batches*m.minibatch_size
     del x,y,i,id
 
+    #@todo: we can't do minibatch bigger then the size of the dataset???
+    assert have_raised2(ds.minibatches,['x','y'],n_batches=1,minibatch_size=len(array)+1,offset=0)
+    assert not have_raised2(ds.minibatches,['x','y'],n_batches=1,minibatch_size=len(array),offset=0)
 
 def test_ds_iterator(array,iterator1,iterator2,iterator3):
     l=len(iterator1)
@@ -494,10 +497,7 @@
     print "test_speed"
     import time
     a2 = numpy.random.rand(100000,400)
-    ds = ArrayDataSet(a2,{'x':slice(3),'y':3,'z':[0,2]})###???tuple not tested
-    ds = ArrayDataSet(a2,LookupList(['x','y','z'],[slice(3),3,[0,2]]))###???tuple not tested
     ds = ArrayDataSet(a2,{'all':slice(0,a2.shape[1],1)})
-    #assert ds==a? should this work?
     mat = numpy.random.rand(400,100)
     @print_timing
     def f_array1(a):