# HG changeset patch # User Frederic Bastien # Date 1212513765 14400 # Node ID 856d14dc44687bbf7c7d1fa91083d316a7b1a35c # Parent 7e6edee187e31384aa35ebf174f904b680aeadb6 implemented CachedDataSet.__iter__ as an optimization diff -r 7e6edee187e3 -r 856d14dc4468 dataset.py --- a/dataset.py Tue Jun 03 12:25:53 2008 -0400 +++ b/dataset.py Tue Jun 03 13:22:45 2008 -0400 @@ -1162,7 +1162,58 @@ return self.cached_examples[i] else: return self.source_dataset[i] - + + def __iter__(self): + class CacheIteratorIter(object): + def __init__(self,dataset): + self.dataset=dataset + self.l = len(dataset) + self.current = 0 + self.fieldnames = self.dataset.fieldNames() + self.example = LookupList(self.fieldnames,[0]*len(self.fieldnames)) + def __iter__(self): return self + def next(self): + if self.current>=self.l: + raise StopIteration + cache_len = len(self.dataset.cached_examples) + if self.current>=cache_len: # whole minibatch is not already in cache + # cache everything from current length to upper + self.dataset.cached_examples.append( + self.dataset.source_dataset[self.current]) + self.example._values = self.dataset.cached_examples[self.current] + self.current+=1 + return self.example + + return CacheIteratorIter(self) + +# class CachedDataSetIterator(object): +# def __init__(self,dataset,fieldnames):#,minibatch_size,n_batches,offset): +# # if fieldnames is None: fieldnames = dataset.fieldNames() +# # store the resulting minibatch in a lookup-list of values +# self.minibatch = LookupList(fieldnames,[0]*len(fieldnames)) +# self.dataset=dataset +# # self.minibatch_size=minibatch_size +# # assert offset>=0 and offset=self.l: +# raise StopIteration +# sub_data = self.dataset.data[self.current] +# self.minibatch._values = [sub_data[c] for c in self.columns] + +# self.current+=self.minibatch_size +# return self.minibatch + +# return CachedDataSetIterator(self,self.fieldNames())#,1,0,0) + class ApplyFunctionDataSet(DataSet): """ A L{DataSet} that contains as fields the results of applying a