diff dataset.py @ 258:19b14afe04b7

merged
author James Bergstra <bergstrj@iro.umontreal.ca>
date Tue, 03 Jun 2008 16:06:21 -0400
parents 4ad6bc9b4f03 8ec867d12428
children 6e69fb91f3c0 6226ebafefc3
line wrap: on
line diff
--- a/dataset.py	Tue Jun 03 16:05:28 2008 -0400
+++ b/dataset.py	Tue Jun 03 16:06:21 2008 -0400
@@ -1150,6 +1150,7 @@
           def __init__(self,dataset):
               self.dataset=dataset
               self.current=offset
+              self.all_fields = self.dataset.fieldNames()==fieldnames
           def __iter__(self): return self
           def next(self):
               upper = self.current+minibatch_size
@@ -1161,7 +1162,7 @@
               all_fields_minibatch = Example(self.dataset.fieldNames(),
                                              zip(*self.dataset.cached_examples[self.current:self.current+minibatch_size]))
               self.current+=minibatch_size
-              if self.dataset.fieldNames()==fieldnames:
+              if self.all_fields:
                   return all_fields_minibatch
               return Example(fieldnames,[all_fields_minibatch[name] for name in fieldnames])
       return CacheIterator(self)
@@ -1170,8 +1171,31 @@
       if type(i)==int and len(self.cached_examples)>i:
           return self.cached_examples[i]
       else:
-          return DataSet.__getitem__(self,i)
-                      
+          return self.source_dataset[i]
+      
+  def __iter__(self):
+      class CacheIteratorIter(object):
+          def __init__(self,dataset):
+              self.dataset=dataset
+              self.l = len(dataset)
+              self.current = 0
+              self.fieldnames = self.dataset.fieldNames()
+              self.example = LookupList(self.fieldnames,[0]*len(self.fieldnames))
+          def __iter__(self): return self
+          def next(self):
+              if self.current>=self.l:
+                  raise StopIteration
+              cache_len = len(self.dataset.cached_examples)
+              if self.current>=cache_len: # whole minibatch is not already in cache
+                  # cache everything from current length to upper
+                  self.dataset.cached_examples.append(
+                      self.dataset.source_dataset[self.current])
+              self.example._values = self.dataset.cached_examples[self.current]
+              self.current+=1
+              return self.example
+
+      return CacheIteratorIter(self)
+
 class ApplyFunctionDataSet(DataSet):
   """
   A L{DataSet} that contains as fields the results of applying a