# HG changeset patch # User James Bergstra # Date 1212523581 14400 # Node ID 19b14afe04b72f919bb5f3112b05a4682a1515ce # Parent 4ad6bc9b4f039e474b1e823683c33f3bb8876829# Parent aef979d5bad9637ec90112bee827eebc46a8895f merged diff -r 4ad6bc9b4f03 -r 19b14afe04b7 dataset.py --- a/dataset.py Tue Jun 03 16:05:28 2008 -0400 +++ b/dataset.py Tue Jun 03 16:06:21 2008 -0400 @@ -1150,6 +1150,7 @@ def __init__(self,dataset): self.dataset=dataset self.current=offset + self.all_fields = self.dataset.fieldNames()==fieldnames def __iter__(self): return self def next(self): upper = self.current+minibatch_size @@ -1161,7 +1162,7 @@ all_fields_minibatch = Example(self.dataset.fieldNames(), zip(*self.dataset.cached_examples[self.current:self.current+minibatch_size])) self.current+=minibatch_size - if self.dataset.fieldNames()==fieldnames: + if self.all_fields: return all_fields_minibatch return Example(fieldnames,[all_fields_minibatch[name] for name in fieldnames]) return CacheIterator(self) @@ -1170,8 +1171,31 @@ if type(i)==int and len(self.cached_examples)>i: return self.cached_examples[i] else: - return DataSet.__getitem__(self,i) - + return self.source_dataset[i] + + def __iter__(self): + class CacheIteratorIter(object): + def __init__(self,dataset): + self.dataset=dataset + self.l = len(dataset) + self.current = 0 + self.fieldnames = self.dataset.fieldNames() + self.example = LookupList(self.fieldnames,[0]*len(self.fieldnames)) + def __iter__(self): return self + def next(self): + if self.current>=self.l: + raise StopIteration + cache_len = len(self.dataset.cached_examples) + if self.current>=cache_len: # whole minibatch is not already in cache + # cache everything from current length to upper + self.dataset.cached_examples.append( + self.dataset.source_dataset[self.current]) + self.example._values = self.dataset.cached_examples[self.current] + self.current+=1 + return self.example + + return CacheIteratorIter(self) + class ApplyFunctionDataSet(DataSet): """ A L{DataSet} that contains as fields the results of applying a diff -r 4ad6bc9b4f03 -r 19b14afe04b7 test_dataset.py --- a/test_dataset.py Tue Jun 03 16:05:28 2008 -0400 +++ b/test_dataset.py Tue Jun 03 16:06:21 2008 -0400 @@ -493,12 +493,11 @@ raise NotImplementedError() -def test_speed(): - print "test_speed" - import time - a2 = numpy.random.rand(100000,400) - ds = ArrayDataSet(a2,{'all':slice(0,a2.shape[1],1)}) +def test_speed(array, ds): + print "test_speed", ds.__class__ + mat = numpy.random.rand(400,100) + @print_timing def f_array_full(a): a+1 @@ -540,11 +539,13 @@ exs[0]+1 # ex[0]*mat - f_array_full(a2) - f_array_index(a2) - f_array_iter(a2) + f_array_full(array) + f_array_index(array) + f_array_iter(array) f_ds_index(ds) + f_ds_index(ds) + f_ds_iter(ds) f_ds_iter(ds) f_ds_mb1(ds,10) @@ -556,7 +557,6 @@ f_ds_mb2(ds,1000) f_ds_mb2(ds,10000) - del a2, ds if __name__=='__main__': test1()