# HG changeset patch # User Frederic Bastien # Date 1210970287 14400 # Node ID 6f55e301c687eade6081e3f804a6d7507e3921c7 # Parent 80731832c62bdb8772b5d2368ba8a196c5295df3 optimisation of ArrayDataSet diff -r 80731832c62b -r 6f55e301c687 dataset.py --- a/dataset.py Thu May 15 15:21:00 2008 -0400 +++ b/dataset.py Fri May 16 16:38:07 2008 -0400 @@ -1015,7 +1015,31 @@ assert key in self.__dict__ # else it means we are trying to access a non-existing property return self.__dict__[key] - + def __iter__(self): + class ArrayDataSetIterator2(object): + def __init__(self,dataset,fieldnames,minibatch_size,n_batches,offset): + if fieldnames is None: fieldnames = dataset.fieldNames() + # store the resulting minibatch in a lookup-list of values + self.minibatch = LookupList(fieldnames,[0]*len(fieldnames)) + self.dataset=dataset + self.minibatch_size=minibatch_size + assert offset>=0 and offset=self.dataset.data.shape[0]: + raise StopIteration + sub_data = self.dataset.data[self.current] + self.minibatch._values = [sub_data[self.dataset.fields_columns[f]] for f in self.minibatch._names] + self.current+=self.minibatch_size + return self.minibatch + + return ArrayDataSetIterator2(self,self.fieldNames(),1,0,0) + def minibatches_nowrap(self,fieldnames,minibatch_size,n_batches,offset): class ArrayDataSetIterator(object): def __init__(self,dataset,fieldnames,minibatch_size,n_batches,offset): @@ -1030,6 +1054,7 @@ def __iter__(self): return self def next(self): + #@todo: we suppose that MinibatchWrapAroundIterator stop the iterator sub_data = self.dataset.data[self.current:self.current+self.minibatch_size] self.minibatch._values = [sub_data[:,self.dataset.fields_columns[f]] for f in self.minibatch._names] self.current+=self.minibatch_size